1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate */ 22*0Sstevel@tonic-gate /* 23*0Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*0Sstevel@tonic-gate * Use is subject to license terms. 25*0Sstevel@tonic-gate */ 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*0Sstevel@tonic-gate 29*0Sstevel@tonic-gate #include <sys/param.h> 30*0Sstevel@tonic-gate #include <sys/user.h> 31*0Sstevel@tonic-gate #include <sys/mman.h> 32*0Sstevel@tonic-gate #include <sys/kmem.h> 33*0Sstevel@tonic-gate #include <sys/sysmacros.h> 34*0Sstevel@tonic-gate #include <sys/cmn_err.h> 35*0Sstevel@tonic-gate #include <sys/systm.h> 36*0Sstevel@tonic-gate #include <sys/tuneable.h> 37*0Sstevel@tonic-gate #include <vm/hat.h> 38*0Sstevel@tonic-gate #include <vm/seg.h> 39*0Sstevel@tonic-gate #include <vm/as.h> 40*0Sstevel@tonic-gate #include <vm/anon.h> 41*0Sstevel@tonic-gate #include <vm/page.h> 42*0Sstevel@tonic-gate #include <sys/buf.h> 43*0Sstevel@tonic-gate #include <sys/swap.h> 44*0Sstevel@tonic-gate #include <sys/atomic.h> 45*0Sstevel@tonic-gate #include <vm/seg_spt.h> 46*0Sstevel@tonic-gate #include <sys/debug.h> 47*0Sstevel@tonic-gate #include <sys/vtrace.h> 48*0Sstevel@tonic-gate #include <sys/shm.h> 49*0Sstevel@tonic-gate #include <sys/lgrp.h> 50*0Sstevel@tonic-gate #include <sys/vmsystm.h> 51*0Sstevel@tonic-gate 52*0Sstevel@tonic-gate #include <sys/tnf_probe.h> 53*0Sstevel@tonic-gate 54*0Sstevel@tonic-gate #define SEGSPTADDR (caddr_t)0x0 55*0Sstevel@tonic-gate 56*0Sstevel@tonic-gate /* 57*0Sstevel@tonic-gate * # pages used for spt 58*0Sstevel@tonic-gate */ 59*0Sstevel@tonic-gate static size_t spt_used; 60*0Sstevel@tonic-gate 61*0Sstevel@tonic-gate /* 62*0Sstevel@tonic-gate * segspt_minfree is the memory left for system after ISM 63*0Sstevel@tonic-gate * locked its pages; it is set up to 5% of availrmem in 64*0Sstevel@tonic-gate * sptcreate when ISM is created. ISM should not use more 65*0Sstevel@tonic-gate * than ~90% of availrmem; if it does, then the performance 66*0Sstevel@tonic-gate * of the system may decrease. Machines with large memories may 67*0Sstevel@tonic-gate * be able to use up more memory for ISM so we set the default 68*0Sstevel@tonic-gate * segspt_minfree to 5% (which gives ISM max 95% of availrmem. 69*0Sstevel@tonic-gate * If somebody wants even more memory for ISM (risking hanging 70*0Sstevel@tonic-gate * the system) they can patch the segspt_minfree to smaller number. 71*0Sstevel@tonic-gate */ 72*0Sstevel@tonic-gate pgcnt_t segspt_minfree = 0; 73*0Sstevel@tonic-gate 74*0Sstevel@tonic-gate static int segspt_create(struct seg *seg, caddr_t argsp); 75*0Sstevel@tonic-gate static int segspt_unmap(struct seg *seg, caddr_t raddr, size_t ssize); 76*0Sstevel@tonic-gate static void segspt_free(struct seg *seg); 77*0Sstevel@tonic-gate static void segspt_free_pages(struct seg *seg, caddr_t addr, size_t len); 78*0Sstevel@tonic-gate static lgrp_mem_policy_info_t *segspt_getpolicy(struct seg *seg, caddr_t addr); 79*0Sstevel@tonic-gate 80*0Sstevel@tonic-gate static void 81*0Sstevel@tonic-gate segspt_badop() 82*0Sstevel@tonic-gate { 83*0Sstevel@tonic-gate panic("segspt_badop called"); 84*0Sstevel@tonic-gate /*NOTREACHED*/ 85*0Sstevel@tonic-gate } 86*0Sstevel@tonic-gate 87*0Sstevel@tonic-gate #define SEGSPT_BADOP(t) (t(*)())segspt_badop 88*0Sstevel@tonic-gate 89*0Sstevel@tonic-gate struct seg_ops segspt_ops = { 90*0Sstevel@tonic-gate SEGSPT_BADOP(int), /* dup */ 91*0Sstevel@tonic-gate segspt_unmap, 92*0Sstevel@tonic-gate segspt_free, 93*0Sstevel@tonic-gate SEGSPT_BADOP(int), /* fault */ 94*0Sstevel@tonic-gate SEGSPT_BADOP(faultcode_t), /* faulta */ 95*0Sstevel@tonic-gate SEGSPT_BADOP(int), /* setprot */ 96*0Sstevel@tonic-gate SEGSPT_BADOP(int), /* checkprot */ 97*0Sstevel@tonic-gate SEGSPT_BADOP(int), /* kluster */ 98*0Sstevel@tonic-gate SEGSPT_BADOP(size_t), /* swapout */ 99*0Sstevel@tonic-gate SEGSPT_BADOP(int), /* sync */ 100*0Sstevel@tonic-gate SEGSPT_BADOP(size_t), /* incore */ 101*0Sstevel@tonic-gate SEGSPT_BADOP(int), /* lockop */ 102*0Sstevel@tonic-gate SEGSPT_BADOP(int), /* getprot */ 103*0Sstevel@tonic-gate SEGSPT_BADOP(u_offset_t), /* getoffset */ 104*0Sstevel@tonic-gate SEGSPT_BADOP(int), /* gettype */ 105*0Sstevel@tonic-gate SEGSPT_BADOP(int), /* getvp */ 106*0Sstevel@tonic-gate SEGSPT_BADOP(int), /* advise */ 107*0Sstevel@tonic-gate SEGSPT_BADOP(void), /* dump */ 108*0Sstevel@tonic-gate SEGSPT_BADOP(int), /* pagelock */ 109*0Sstevel@tonic-gate SEGSPT_BADOP(int), /* setpgsz */ 110*0Sstevel@tonic-gate SEGSPT_BADOP(int), /* getmemid */ 111*0Sstevel@tonic-gate segspt_getpolicy, /* getpolicy */ 112*0Sstevel@tonic-gate }; 113*0Sstevel@tonic-gate 114*0Sstevel@tonic-gate static int segspt_shmdup(struct seg *seg, struct seg *newseg); 115*0Sstevel@tonic-gate static int segspt_shmunmap(struct seg *seg, caddr_t raddr, size_t ssize); 116*0Sstevel@tonic-gate static void segspt_shmfree(struct seg *seg); 117*0Sstevel@tonic-gate static faultcode_t segspt_shmfault(struct hat *hat, struct seg *seg, 118*0Sstevel@tonic-gate caddr_t addr, size_t len, enum fault_type type, enum seg_rw rw); 119*0Sstevel@tonic-gate static faultcode_t segspt_shmfaulta(struct seg *seg, caddr_t addr); 120*0Sstevel@tonic-gate static int segspt_shmsetprot(register struct seg *seg, register caddr_t addr, 121*0Sstevel@tonic-gate register size_t len, register uint_t prot); 122*0Sstevel@tonic-gate static int segspt_shmcheckprot(struct seg *seg, caddr_t addr, size_t size, 123*0Sstevel@tonic-gate uint_t prot); 124*0Sstevel@tonic-gate static int segspt_shmkluster(struct seg *seg, caddr_t addr, ssize_t delta); 125*0Sstevel@tonic-gate static size_t segspt_shmswapout(struct seg *seg); 126*0Sstevel@tonic-gate static size_t segspt_shmincore(struct seg *seg, caddr_t addr, size_t len, 127*0Sstevel@tonic-gate register char *vec); 128*0Sstevel@tonic-gate static int segspt_shmsync(struct seg *seg, register caddr_t addr, size_t len, 129*0Sstevel@tonic-gate int attr, uint_t flags); 130*0Sstevel@tonic-gate static int segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, 131*0Sstevel@tonic-gate int attr, int op, ulong_t *lockmap, size_t pos); 132*0Sstevel@tonic-gate static int segspt_shmgetprot(struct seg *seg, caddr_t addr, size_t len, 133*0Sstevel@tonic-gate uint_t *protv); 134*0Sstevel@tonic-gate static u_offset_t segspt_shmgetoffset(struct seg *seg, caddr_t addr); 135*0Sstevel@tonic-gate static int segspt_shmgettype(struct seg *seg, caddr_t addr); 136*0Sstevel@tonic-gate static int segspt_shmgetvp(struct seg *seg, caddr_t addr, struct vnode **vpp); 137*0Sstevel@tonic-gate static int segspt_shmadvise(struct seg *seg, caddr_t addr, size_t len, 138*0Sstevel@tonic-gate uint_t behav); 139*0Sstevel@tonic-gate static void segspt_shmdump(struct seg *seg); 140*0Sstevel@tonic-gate static int segspt_shmpagelock(struct seg *, caddr_t, size_t, 141*0Sstevel@tonic-gate struct page ***, enum lock_type, enum seg_rw); 142*0Sstevel@tonic-gate static int segspt_shmsetpgsz(struct seg *, caddr_t, size_t, uint_t); 143*0Sstevel@tonic-gate static int segspt_shmgetmemid(struct seg *, caddr_t, memid_t *); 144*0Sstevel@tonic-gate static lgrp_mem_policy_info_t *segspt_shmgetpolicy(struct seg *, caddr_t); 145*0Sstevel@tonic-gate 146*0Sstevel@tonic-gate struct seg_ops segspt_shmops = { 147*0Sstevel@tonic-gate segspt_shmdup, 148*0Sstevel@tonic-gate segspt_shmunmap, 149*0Sstevel@tonic-gate segspt_shmfree, 150*0Sstevel@tonic-gate segspt_shmfault, 151*0Sstevel@tonic-gate segspt_shmfaulta, 152*0Sstevel@tonic-gate segspt_shmsetprot, 153*0Sstevel@tonic-gate segspt_shmcheckprot, 154*0Sstevel@tonic-gate segspt_shmkluster, 155*0Sstevel@tonic-gate segspt_shmswapout, 156*0Sstevel@tonic-gate segspt_shmsync, 157*0Sstevel@tonic-gate segspt_shmincore, 158*0Sstevel@tonic-gate segspt_shmlockop, 159*0Sstevel@tonic-gate segspt_shmgetprot, 160*0Sstevel@tonic-gate segspt_shmgetoffset, 161*0Sstevel@tonic-gate segspt_shmgettype, 162*0Sstevel@tonic-gate segspt_shmgetvp, 163*0Sstevel@tonic-gate segspt_shmadvise, /* advise */ 164*0Sstevel@tonic-gate segspt_shmdump, 165*0Sstevel@tonic-gate segspt_shmpagelock, 166*0Sstevel@tonic-gate segspt_shmsetpgsz, 167*0Sstevel@tonic-gate segspt_shmgetmemid, 168*0Sstevel@tonic-gate segspt_shmgetpolicy, 169*0Sstevel@tonic-gate }; 170*0Sstevel@tonic-gate 171*0Sstevel@tonic-gate static void segspt_purge(struct seg *seg); 172*0Sstevel@tonic-gate static int segspt_reclaim(struct seg *, caddr_t, size_t, struct page **, 173*0Sstevel@tonic-gate enum seg_rw); 174*0Sstevel@tonic-gate static int spt_anon_getpages(struct seg *seg, caddr_t addr, size_t len, 175*0Sstevel@tonic-gate page_t **ppa); 176*0Sstevel@tonic-gate 177*0Sstevel@tonic-gate 178*0Sstevel@tonic-gate 179*0Sstevel@tonic-gate /*ARGSUSED*/ 180*0Sstevel@tonic-gate int 181*0Sstevel@tonic-gate sptcreate(size_t size, struct seg **sptseg, struct anon_map *amp, 182*0Sstevel@tonic-gate uint_t prot, uint_t flags, uint_t share_szc) 183*0Sstevel@tonic-gate { 184*0Sstevel@tonic-gate int err; 185*0Sstevel@tonic-gate struct as *newas; 186*0Sstevel@tonic-gate struct segspt_crargs sptcargs; 187*0Sstevel@tonic-gate 188*0Sstevel@tonic-gate #ifdef DEBUG 189*0Sstevel@tonic-gate TNF_PROBE_1(sptcreate, "spt", /* CSTYLED */, 190*0Sstevel@tonic-gate tnf_ulong, size, size ); 191*0Sstevel@tonic-gate #endif 192*0Sstevel@tonic-gate if (segspt_minfree == 0) /* leave min 5% of availrmem for */ 193*0Sstevel@tonic-gate segspt_minfree = availrmem/20; /* for the system */ 194*0Sstevel@tonic-gate 195*0Sstevel@tonic-gate if (!hat_supported(HAT_SHARED_PT, (void *)0)) 196*0Sstevel@tonic-gate return (EINVAL); 197*0Sstevel@tonic-gate 198*0Sstevel@tonic-gate /* 199*0Sstevel@tonic-gate * get a new as for this shared memory segment 200*0Sstevel@tonic-gate */ 201*0Sstevel@tonic-gate newas = as_alloc(); 202*0Sstevel@tonic-gate sptcargs.amp = amp; 203*0Sstevel@tonic-gate sptcargs.prot = prot; 204*0Sstevel@tonic-gate sptcargs.flags = flags; 205*0Sstevel@tonic-gate sptcargs.szc = share_szc; 206*0Sstevel@tonic-gate 207*0Sstevel@tonic-gate /* 208*0Sstevel@tonic-gate * create a shared page table (spt) segment 209*0Sstevel@tonic-gate */ 210*0Sstevel@tonic-gate 211*0Sstevel@tonic-gate if (err = as_map(newas, SEGSPTADDR, size, segspt_create, &sptcargs)) { 212*0Sstevel@tonic-gate as_free(newas); 213*0Sstevel@tonic-gate return (err); 214*0Sstevel@tonic-gate } 215*0Sstevel@tonic-gate *sptseg = sptcargs.seg_spt; 216*0Sstevel@tonic-gate return (0); 217*0Sstevel@tonic-gate } 218*0Sstevel@tonic-gate 219*0Sstevel@tonic-gate void 220*0Sstevel@tonic-gate sptdestroy(struct as *as, struct anon_map *amp) 221*0Sstevel@tonic-gate { 222*0Sstevel@tonic-gate 223*0Sstevel@tonic-gate #ifdef DEBUG 224*0Sstevel@tonic-gate TNF_PROBE_0(sptdestroy, "spt", /* CSTYLED */); 225*0Sstevel@tonic-gate #endif 226*0Sstevel@tonic-gate (void) as_unmap(as, SEGSPTADDR, amp->size); 227*0Sstevel@tonic-gate as_free(as); 228*0Sstevel@tonic-gate } 229*0Sstevel@tonic-gate 230*0Sstevel@tonic-gate /* 231*0Sstevel@tonic-gate * called from seg_free(). 232*0Sstevel@tonic-gate * free (i.e., unlock, unmap, return to free list) 233*0Sstevel@tonic-gate * all the pages in the given seg. 234*0Sstevel@tonic-gate */ 235*0Sstevel@tonic-gate void 236*0Sstevel@tonic-gate segspt_free(struct seg *seg) 237*0Sstevel@tonic-gate { 238*0Sstevel@tonic-gate struct spt_data *sptd = (struct spt_data *)seg->s_data; 239*0Sstevel@tonic-gate 240*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 241*0Sstevel@tonic-gate 242*0Sstevel@tonic-gate if (sptd != NULL) { 243*0Sstevel@tonic-gate if (sptd->spt_realsize) 244*0Sstevel@tonic-gate segspt_free_pages(seg, seg->s_base, sptd->spt_realsize); 245*0Sstevel@tonic-gate 246*0Sstevel@tonic-gate if (sptd->spt_ppa_lckcnt) 247*0Sstevel@tonic-gate kmem_free(sptd->spt_ppa_lckcnt, 248*0Sstevel@tonic-gate sizeof (*sptd->spt_ppa_lckcnt) 249*0Sstevel@tonic-gate * btopr(sptd->spt_amp->size)); 250*0Sstevel@tonic-gate kmem_free(sptd->spt_vp, sizeof (*sptd->spt_vp)); 251*0Sstevel@tonic-gate mutex_destroy(&sptd->spt_lock); 252*0Sstevel@tonic-gate kmem_free(sptd, sizeof (*sptd)); 253*0Sstevel@tonic-gate } 254*0Sstevel@tonic-gate } 255*0Sstevel@tonic-gate 256*0Sstevel@tonic-gate /*ARGSUSED*/ 257*0Sstevel@tonic-gate static int 258*0Sstevel@tonic-gate segspt_shmsync(struct seg *seg, caddr_t addr, size_t len, int attr, 259*0Sstevel@tonic-gate uint_t flags) 260*0Sstevel@tonic-gate { 261*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 262*0Sstevel@tonic-gate 263*0Sstevel@tonic-gate return (0); 264*0Sstevel@tonic-gate } 265*0Sstevel@tonic-gate 266*0Sstevel@tonic-gate /*ARGSUSED*/ 267*0Sstevel@tonic-gate static size_t 268*0Sstevel@tonic-gate segspt_shmincore(struct seg *seg, caddr_t addr, size_t len, char *vec) 269*0Sstevel@tonic-gate { 270*0Sstevel@tonic-gate caddr_t eo_seg; 271*0Sstevel@tonic-gate pgcnt_t npages; 272*0Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 273*0Sstevel@tonic-gate struct seg *sptseg; 274*0Sstevel@tonic-gate struct spt_data *sptd; 275*0Sstevel@tonic-gate 276*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 277*0Sstevel@tonic-gate #ifdef lint 278*0Sstevel@tonic-gate seg = seg; 279*0Sstevel@tonic-gate #endif 280*0Sstevel@tonic-gate sptseg = shmd->shm_sptseg; 281*0Sstevel@tonic-gate sptd = sptseg->s_data; 282*0Sstevel@tonic-gate 283*0Sstevel@tonic-gate if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { 284*0Sstevel@tonic-gate eo_seg = addr + len; 285*0Sstevel@tonic-gate while (addr < eo_seg) { 286*0Sstevel@tonic-gate /* page exists, and it's locked. */ 287*0Sstevel@tonic-gate *vec++ = SEG_PAGE_INCORE | SEG_PAGE_LOCKED | 288*0Sstevel@tonic-gate SEG_PAGE_ANON; 289*0Sstevel@tonic-gate addr += PAGESIZE; 290*0Sstevel@tonic-gate } 291*0Sstevel@tonic-gate return (len); 292*0Sstevel@tonic-gate } else { 293*0Sstevel@tonic-gate struct anon_map *amp = shmd->shm_amp; 294*0Sstevel@tonic-gate struct anon *ap; 295*0Sstevel@tonic-gate page_t *pp; 296*0Sstevel@tonic-gate pgcnt_t anon_index; 297*0Sstevel@tonic-gate struct vnode *vp; 298*0Sstevel@tonic-gate u_offset_t off; 299*0Sstevel@tonic-gate ulong_t i; 300*0Sstevel@tonic-gate int ret; 301*0Sstevel@tonic-gate anon_sync_obj_t cookie; 302*0Sstevel@tonic-gate 303*0Sstevel@tonic-gate addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 304*0Sstevel@tonic-gate anon_index = seg_page(seg, addr); 305*0Sstevel@tonic-gate npages = btopr(len); 306*0Sstevel@tonic-gate if (anon_index + npages > btopr(shmd->shm_amp->size)) { 307*0Sstevel@tonic-gate return (EINVAL); 308*0Sstevel@tonic-gate } 309*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 310*0Sstevel@tonic-gate for (i = 0; i < npages; i++, anon_index++) { 311*0Sstevel@tonic-gate ret = 0; 312*0Sstevel@tonic-gate anon_array_enter(amp, anon_index, &cookie); 313*0Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, anon_index); 314*0Sstevel@tonic-gate if (ap != NULL) { 315*0Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 316*0Sstevel@tonic-gate anon_array_exit(&cookie); 317*0Sstevel@tonic-gate pp = page_lookup_nowait(vp, off, SE_SHARED); 318*0Sstevel@tonic-gate if (pp != NULL) { 319*0Sstevel@tonic-gate ret |= SEG_PAGE_INCORE | SEG_PAGE_ANON; 320*0Sstevel@tonic-gate page_unlock(pp); 321*0Sstevel@tonic-gate } 322*0Sstevel@tonic-gate } else { 323*0Sstevel@tonic-gate anon_array_exit(&cookie); 324*0Sstevel@tonic-gate } 325*0Sstevel@tonic-gate if (shmd->shm_vpage[anon_index] & DISM_PG_LOCKED) { 326*0Sstevel@tonic-gate ret |= SEG_PAGE_LOCKED; 327*0Sstevel@tonic-gate } 328*0Sstevel@tonic-gate *vec++ = (char)ret; 329*0Sstevel@tonic-gate } 330*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 331*0Sstevel@tonic-gate return (len); 332*0Sstevel@tonic-gate } 333*0Sstevel@tonic-gate } 334*0Sstevel@tonic-gate 335*0Sstevel@tonic-gate static int 336*0Sstevel@tonic-gate segspt_unmap(struct seg *seg, caddr_t raddr, size_t ssize) 337*0Sstevel@tonic-gate { 338*0Sstevel@tonic-gate size_t share_size; 339*0Sstevel@tonic-gate 340*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 341*0Sstevel@tonic-gate 342*0Sstevel@tonic-gate /* 343*0Sstevel@tonic-gate * seg.s_size may have been rounded up to the largest page size 344*0Sstevel@tonic-gate * in shmat(). 345*0Sstevel@tonic-gate * XXX This should be cleanedup. sptdestroy should take a length 346*0Sstevel@tonic-gate * argument which should be the same as sptcreate. Then 347*0Sstevel@tonic-gate * this rounding would not be needed (or is done in shm.c) 348*0Sstevel@tonic-gate * Only the check for full segment will be needed. 349*0Sstevel@tonic-gate * 350*0Sstevel@tonic-gate * XXX -- shouldn't raddr == 0 always? These tests don't seem 351*0Sstevel@tonic-gate * to be useful at all. 352*0Sstevel@tonic-gate */ 353*0Sstevel@tonic-gate share_size = page_get_pagesize(seg->s_szc); 354*0Sstevel@tonic-gate ssize = P2ROUNDUP(ssize, share_size); 355*0Sstevel@tonic-gate 356*0Sstevel@tonic-gate if (raddr == seg->s_base && ssize == seg->s_size) { 357*0Sstevel@tonic-gate seg_free(seg); 358*0Sstevel@tonic-gate return (0); 359*0Sstevel@tonic-gate } else 360*0Sstevel@tonic-gate return (EINVAL); 361*0Sstevel@tonic-gate } 362*0Sstevel@tonic-gate 363*0Sstevel@tonic-gate int 364*0Sstevel@tonic-gate segspt_create(struct seg *seg, caddr_t argsp) 365*0Sstevel@tonic-gate { 366*0Sstevel@tonic-gate int err; 367*0Sstevel@tonic-gate caddr_t addr = seg->s_base; 368*0Sstevel@tonic-gate struct spt_data *sptd; 369*0Sstevel@tonic-gate struct segspt_crargs *sptcargs = (struct segspt_crargs *)argsp; 370*0Sstevel@tonic-gate struct anon_map *amp = sptcargs->amp; 371*0Sstevel@tonic-gate struct cred *cred = CRED(); 372*0Sstevel@tonic-gate ulong_t i, j, anon_index = 0; 373*0Sstevel@tonic-gate pgcnt_t npages = btopr(amp->size); 374*0Sstevel@tonic-gate struct vnode *vp; 375*0Sstevel@tonic-gate page_t **ppa; 376*0Sstevel@tonic-gate uint_t hat_flags; 377*0Sstevel@tonic-gate 378*0Sstevel@tonic-gate /* 379*0Sstevel@tonic-gate * We are holding the a_lock on the underlying dummy as, 380*0Sstevel@tonic-gate * so we can make calls to the HAT layer. 381*0Sstevel@tonic-gate */ 382*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 383*0Sstevel@tonic-gate 384*0Sstevel@tonic-gate #ifdef DEBUG 385*0Sstevel@tonic-gate TNF_PROBE_2(segspt_create, "spt", /* CSTYLED */, 386*0Sstevel@tonic-gate tnf_opaque, addr, addr, 387*0Sstevel@tonic-gate tnf_ulong, len, seg->s_size); 388*0Sstevel@tonic-gate #endif 389*0Sstevel@tonic-gate if ((sptcargs->flags & SHM_PAGEABLE) == 0) { 390*0Sstevel@tonic-gate if (err = anon_swap_adjust(npages)) 391*0Sstevel@tonic-gate return (err); 392*0Sstevel@tonic-gate } 393*0Sstevel@tonic-gate err = ENOMEM; 394*0Sstevel@tonic-gate 395*0Sstevel@tonic-gate if ((sptd = kmem_zalloc(sizeof (*sptd), KM_NOSLEEP)) == NULL) 396*0Sstevel@tonic-gate goto out1; 397*0Sstevel@tonic-gate 398*0Sstevel@tonic-gate if ((sptcargs->flags & SHM_PAGEABLE) == 0) { 399*0Sstevel@tonic-gate if ((ppa = kmem_zalloc(((sizeof (page_t *)) * npages), 400*0Sstevel@tonic-gate KM_NOSLEEP)) == NULL) 401*0Sstevel@tonic-gate goto out2; 402*0Sstevel@tonic-gate } 403*0Sstevel@tonic-gate 404*0Sstevel@tonic-gate mutex_init(&sptd->spt_lock, NULL, MUTEX_DEFAULT, NULL); 405*0Sstevel@tonic-gate 406*0Sstevel@tonic-gate if ((vp = kmem_zalloc(sizeof (*vp), KM_NOSLEEP)) == NULL) 407*0Sstevel@tonic-gate goto out3; 408*0Sstevel@tonic-gate 409*0Sstevel@tonic-gate seg->s_ops = &segspt_ops; 410*0Sstevel@tonic-gate sptd->spt_vp = vp; 411*0Sstevel@tonic-gate sptd->spt_amp = amp; 412*0Sstevel@tonic-gate sptd->spt_prot = sptcargs->prot; 413*0Sstevel@tonic-gate sptd->spt_flags = sptcargs->flags; 414*0Sstevel@tonic-gate seg->s_data = (caddr_t)sptd; 415*0Sstevel@tonic-gate sptd->spt_ppa = NULL; 416*0Sstevel@tonic-gate sptd->spt_ppa_lckcnt = NULL; 417*0Sstevel@tonic-gate seg->s_szc = sptcargs->szc; 418*0Sstevel@tonic-gate 419*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 420*0Sstevel@tonic-gate amp->a_szc = seg->s_szc; 421*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 422*0Sstevel@tonic-gate 423*0Sstevel@tonic-gate /* 424*0Sstevel@tonic-gate * Set policy to affect initial allocation of pages in 425*0Sstevel@tonic-gate * anon_map_createpages() 426*0Sstevel@tonic-gate */ 427*0Sstevel@tonic-gate (void) lgrp_shm_policy_set(LGRP_MEM_POLICY_DEFAULT, amp, anon_index, 428*0Sstevel@tonic-gate NULL, 0, ptob(npages)); 429*0Sstevel@tonic-gate 430*0Sstevel@tonic-gate if (sptcargs->flags & SHM_PAGEABLE) { 431*0Sstevel@tonic-gate size_t share_sz; 432*0Sstevel@tonic-gate pgcnt_t new_npgs, more_pgs; 433*0Sstevel@tonic-gate struct anon_hdr *nahp; 434*0Sstevel@tonic-gate 435*0Sstevel@tonic-gate share_sz = page_get_pagesize(seg->s_szc); 436*0Sstevel@tonic-gate if (!IS_P2ALIGNED(amp->size, share_sz)) { 437*0Sstevel@tonic-gate /* 438*0Sstevel@tonic-gate * We are rounding up the size of the anon array 439*0Sstevel@tonic-gate * on 4 M boundary because we always create 4 M 440*0Sstevel@tonic-gate * of page(s) when locking, faulting pages and we 441*0Sstevel@tonic-gate * don't have to check for all corner cases e.g. 442*0Sstevel@tonic-gate * if there is enough space to allocate 4 M 443*0Sstevel@tonic-gate * page. 444*0Sstevel@tonic-gate */ 445*0Sstevel@tonic-gate new_npgs = btop(P2ROUNDUP(amp->size, share_sz)); 446*0Sstevel@tonic-gate more_pgs = new_npgs - npages; 447*0Sstevel@tonic-gate 448*0Sstevel@tonic-gate if (anon_resv(ptob(more_pgs)) == 0) { 449*0Sstevel@tonic-gate err = ENOMEM; 450*0Sstevel@tonic-gate goto out4; 451*0Sstevel@tonic-gate } 452*0Sstevel@tonic-gate nahp = anon_create(new_npgs, ANON_SLEEP); 453*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 454*0Sstevel@tonic-gate (void) anon_copy_ptr(amp->ahp, 0, nahp, 0, npages, 455*0Sstevel@tonic-gate ANON_SLEEP); 456*0Sstevel@tonic-gate anon_release(amp->ahp, npages); 457*0Sstevel@tonic-gate amp->ahp = nahp; 458*0Sstevel@tonic-gate amp->swresv = amp->size = ptob(new_npgs); 459*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 460*0Sstevel@tonic-gate npages = new_npgs; 461*0Sstevel@tonic-gate } 462*0Sstevel@tonic-gate 463*0Sstevel@tonic-gate sptd->spt_ppa_lckcnt = kmem_zalloc(npages * 464*0Sstevel@tonic-gate sizeof (*sptd->spt_ppa_lckcnt), KM_SLEEP); 465*0Sstevel@tonic-gate sptd->spt_pcachecnt = 0; 466*0Sstevel@tonic-gate sptd->spt_realsize = ptob(npages); 467*0Sstevel@tonic-gate sptcargs->seg_spt = seg; 468*0Sstevel@tonic-gate return (0); 469*0Sstevel@tonic-gate } 470*0Sstevel@tonic-gate 471*0Sstevel@tonic-gate /* 472*0Sstevel@tonic-gate * get array of pages for each anon slot in amp 473*0Sstevel@tonic-gate */ 474*0Sstevel@tonic-gate if ((err = anon_map_createpages(amp, anon_index, ptob(npages), ppa, 475*0Sstevel@tonic-gate seg, addr, S_CREATE, cred)) != 0) 476*0Sstevel@tonic-gate goto out4; 477*0Sstevel@tonic-gate 478*0Sstevel@tonic-gate /* 479*0Sstevel@tonic-gate * addr is initial address corresponding to the first page on ppa list 480*0Sstevel@tonic-gate */ 481*0Sstevel@tonic-gate for (i = 0; i < npages; i++) { 482*0Sstevel@tonic-gate /* attempt to lock all pages */ 483*0Sstevel@tonic-gate if (!page_pp_lock(ppa[i], 0, 1)) { 484*0Sstevel@tonic-gate /* 485*0Sstevel@tonic-gate * if unable to lock any page, unlock all 486*0Sstevel@tonic-gate * of them and return error 487*0Sstevel@tonic-gate */ 488*0Sstevel@tonic-gate for (j = 0; j < i; j++) 489*0Sstevel@tonic-gate page_pp_unlock(ppa[j], 0, 1); 490*0Sstevel@tonic-gate for (i = 0; i < npages; i++) { 491*0Sstevel@tonic-gate page_unlock(ppa[i]); 492*0Sstevel@tonic-gate } 493*0Sstevel@tonic-gate err = ENOMEM; 494*0Sstevel@tonic-gate goto out4; 495*0Sstevel@tonic-gate } 496*0Sstevel@tonic-gate } 497*0Sstevel@tonic-gate 498*0Sstevel@tonic-gate /* 499*0Sstevel@tonic-gate * Some platforms assume that ISM mappings are HAT_LOAD_LOCK 500*0Sstevel@tonic-gate * for the entire life of the segment. For example platforms 501*0Sstevel@tonic-gate * that do not support Dynamic Reconfiguration. 502*0Sstevel@tonic-gate */ 503*0Sstevel@tonic-gate hat_flags = HAT_LOAD_SHARE; 504*0Sstevel@tonic-gate if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, NULL)) 505*0Sstevel@tonic-gate hat_flags |= HAT_LOAD_LOCK; 506*0Sstevel@tonic-gate 507*0Sstevel@tonic-gate hat_memload_array(seg->s_as->a_hat, addr, ptob(npages), 508*0Sstevel@tonic-gate ppa, sptd->spt_prot, hat_flags); 509*0Sstevel@tonic-gate 510*0Sstevel@tonic-gate /* 511*0Sstevel@tonic-gate * On platforms that do not support HAT_DYNAMIC_ISM_UNMAP, 512*0Sstevel@tonic-gate * we will leave the pages locked SE_SHARED for the life 513*0Sstevel@tonic-gate * of the ISM segment. This will prevent any calls to 514*0Sstevel@tonic-gate * hat_pageunload() on this ISM segment for those platforms. 515*0Sstevel@tonic-gate */ 516*0Sstevel@tonic-gate if (!(hat_flags & HAT_LOAD_LOCK)) { 517*0Sstevel@tonic-gate /* 518*0Sstevel@tonic-gate * On platforms that support HAT_DYNAMIC_ISM_UNMAP, 519*0Sstevel@tonic-gate * we no longer need to hold the SE_SHARED lock on the pages, 520*0Sstevel@tonic-gate * since L_PAGELOCK and F_SOFTLOCK calls will grab the 521*0Sstevel@tonic-gate * SE_SHARED lock on the pages as necessary. 522*0Sstevel@tonic-gate */ 523*0Sstevel@tonic-gate for (i = 0; i < npages; i++) 524*0Sstevel@tonic-gate page_unlock(ppa[i]); 525*0Sstevel@tonic-gate } 526*0Sstevel@tonic-gate sptd->spt_pcachecnt = 0; 527*0Sstevel@tonic-gate kmem_free(ppa, ((sizeof (page_t *)) * npages)); 528*0Sstevel@tonic-gate sptd->spt_realsize = ptob(npages); 529*0Sstevel@tonic-gate atomic_add_long(&spt_used, npages); 530*0Sstevel@tonic-gate sptcargs->seg_spt = seg; 531*0Sstevel@tonic-gate return (0); 532*0Sstevel@tonic-gate 533*0Sstevel@tonic-gate out4: 534*0Sstevel@tonic-gate seg->s_data = NULL; 535*0Sstevel@tonic-gate kmem_free(vp, sizeof (*vp)); 536*0Sstevel@tonic-gate out3: 537*0Sstevel@tonic-gate mutex_destroy(&sptd->spt_lock); 538*0Sstevel@tonic-gate if ((sptcargs->flags & SHM_PAGEABLE) == 0) 539*0Sstevel@tonic-gate kmem_free(ppa, (sizeof (*ppa) * npages)); 540*0Sstevel@tonic-gate out2: 541*0Sstevel@tonic-gate kmem_free(sptd, sizeof (*sptd)); 542*0Sstevel@tonic-gate out1: 543*0Sstevel@tonic-gate if ((sptcargs->flags & SHM_PAGEABLE) == 0) 544*0Sstevel@tonic-gate anon_swap_restore(npages); 545*0Sstevel@tonic-gate return (err); 546*0Sstevel@tonic-gate } 547*0Sstevel@tonic-gate 548*0Sstevel@tonic-gate /*ARGSUSED*/ 549*0Sstevel@tonic-gate void 550*0Sstevel@tonic-gate segspt_free_pages(struct seg *seg, caddr_t addr, size_t len) 551*0Sstevel@tonic-gate { 552*0Sstevel@tonic-gate struct page *pp; 553*0Sstevel@tonic-gate struct spt_data *sptd = (struct spt_data *)seg->s_data; 554*0Sstevel@tonic-gate pgcnt_t npages; 555*0Sstevel@tonic-gate ulong_t anon_idx; 556*0Sstevel@tonic-gate struct anon_map *amp; 557*0Sstevel@tonic-gate struct anon *ap; 558*0Sstevel@tonic-gate struct vnode *vp; 559*0Sstevel@tonic-gate u_offset_t off; 560*0Sstevel@tonic-gate uint_t hat_flags; 561*0Sstevel@tonic-gate int root = 0; 562*0Sstevel@tonic-gate pgcnt_t pgs, curnpgs = 0; 563*0Sstevel@tonic-gate page_t *rootpp; 564*0Sstevel@tonic-gate 565*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 566*0Sstevel@tonic-gate 567*0Sstevel@tonic-gate len = P2ROUNDUP(len, PAGESIZE); 568*0Sstevel@tonic-gate 569*0Sstevel@tonic-gate npages = btop(len); 570*0Sstevel@tonic-gate 571*0Sstevel@tonic-gate hat_flags = HAT_UNLOAD_UNLOCK; 572*0Sstevel@tonic-gate if ((hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) || 573*0Sstevel@tonic-gate (sptd->spt_flags & SHM_PAGEABLE)) { 574*0Sstevel@tonic-gate hat_flags = HAT_UNLOAD; 575*0Sstevel@tonic-gate } 576*0Sstevel@tonic-gate 577*0Sstevel@tonic-gate hat_unload(seg->s_as->a_hat, addr, len, hat_flags); 578*0Sstevel@tonic-gate 579*0Sstevel@tonic-gate amp = sptd->spt_amp; 580*0Sstevel@tonic-gate if (sptd->spt_flags & SHM_PAGEABLE) 581*0Sstevel@tonic-gate npages = btop(amp->size); 582*0Sstevel@tonic-gate 583*0Sstevel@tonic-gate ASSERT(amp); 584*0Sstevel@tonic-gate for (anon_idx = 0; anon_idx < npages; anon_idx++) { 585*0Sstevel@tonic-gate if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { 586*0Sstevel@tonic-gate if ((ap = anon_get_ptr(amp->ahp, anon_idx)) == NULL) { 587*0Sstevel@tonic-gate panic("segspt_free_pages: null app"); 588*0Sstevel@tonic-gate /*NOTREACHED*/ 589*0Sstevel@tonic-gate } 590*0Sstevel@tonic-gate } else { 591*0Sstevel@tonic-gate if ((ap = anon_get_next_ptr(amp->ahp, &anon_idx)) 592*0Sstevel@tonic-gate == NULL) 593*0Sstevel@tonic-gate continue; 594*0Sstevel@tonic-gate } 595*0Sstevel@tonic-gate ASSERT(ANON_ISBUSY(anon_get_slot(amp->ahp, anon_idx)) == 0); 596*0Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 597*0Sstevel@tonic-gate 598*0Sstevel@tonic-gate /* 599*0Sstevel@tonic-gate * If this platform supports HAT_DYNAMIC_ISM_UNMAP, 600*0Sstevel@tonic-gate * the pages won't be having SE_SHARED lock at this 601*0Sstevel@tonic-gate * point. 602*0Sstevel@tonic-gate * 603*0Sstevel@tonic-gate * On platforms that do not support HAT_DYNAMIC_ISM_UNMAP, 604*0Sstevel@tonic-gate * the pages are still held SE_SHARED locked from the 605*0Sstevel@tonic-gate * original segspt_create() 606*0Sstevel@tonic-gate * 607*0Sstevel@tonic-gate * Our goal is to get SE_EXCL lock on each page, remove 608*0Sstevel@tonic-gate * permanent lock on it and invalidate the page. 609*0Sstevel@tonic-gate */ 610*0Sstevel@tonic-gate if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { 611*0Sstevel@tonic-gate if (hat_flags == HAT_UNLOAD) 612*0Sstevel@tonic-gate pp = page_lookup(vp, off, SE_EXCL); 613*0Sstevel@tonic-gate else { 614*0Sstevel@tonic-gate if ((pp = page_find(vp, off)) == NULL) { 615*0Sstevel@tonic-gate panic("segspt_free_pages: " 616*0Sstevel@tonic-gate "page not locked"); 617*0Sstevel@tonic-gate /*NOTREACHED*/ 618*0Sstevel@tonic-gate } 619*0Sstevel@tonic-gate if (!page_tryupgrade(pp)) { 620*0Sstevel@tonic-gate page_unlock(pp); 621*0Sstevel@tonic-gate pp = page_lookup(vp, off, SE_EXCL); 622*0Sstevel@tonic-gate } 623*0Sstevel@tonic-gate } 624*0Sstevel@tonic-gate if (pp == NULL) { 625*0Sstevel@tonic-gate panic("segspt_free_pages: " 626*0Sstevel@tonic-gate "page not in the system"); 627*0Sstevel@tonic-gate /*NOTREACHED*/ 628*0Sstevel@tonic-gate } 629*0Sstevel@tonic-gate page_pp_unlock(pp, 0, 1); 630*0Sstevel@tonic-gate } else { 631*0Sstevel@tonic-gate if ((pp = page_lookup(vp, off, SE_EXCL)) == NULL) 632*0Sstevel@tonic-gate continue; 633*0Sstevel@tonic-gate page_pp_unlock(pp, 0, 0); 634*0Sstevel@tonic-gate } 635*0Sstevel@tonic-gate /* 636*0Sstevel@tonic-gate * It's logical to invalidate the pages here as in most cases 637*0Sstevel@tonic-gate * these were created by segspt. 638*0Sstevel@tonic-gate */ 639*0Sstevel@tonic-gate if (pp->p_szc != 0) { 640*0Sstevel@tonic-gate /* 641*0Sstevel@tonic-gate * For DISM swap is released in shm_rm_amp. 642*0Sstevel@tonic-gate */ 643*0Sstevel@tonic-gate if ((sptd->spt_flags & SHM_PAGEABLE) == 0 && 644*0Sstevel@tonic-gate ap->an_pvp != NULL) { 645*0Sstevel@tonic-gate panic("segspt_free_pages: pvp non NULL"); 646*0Sstevel@tonic-gate /*NOTREACHED*/ 647*0Sstevel@tonic-gate } 648*0Sstevel@tonic-gate if (root == 0) { 649*0Sstevel@tonic-gate ASSERT(curnpgs == 0); 650*0Sstevel@tonic-gate root = 1; 651*0Sstevel@tonic-gate rootpp = pp; 652*0Sstevel@tonic-gate pgs = curnpgs = page_get_pagecnt(pp->p_szc); 653*0Sstevel@tonic-gate ASSERT(pgs > 1); 654*0Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pgs, pgs)); 655*0Sstevel@tonic-gate ASSERT(!(page_pptonum(pp) & (pgs - 1))); 656*0Sstevel@tonic-gate curnpgs--; 657*0Sstevel@tonic-gate } else if ((page_pptonum(pp) & (pgs - 1)) == pgs - 1) { 658*0Sstevel@tonic-gate ASSERT(curnpgs == 1); 659*0Sstevel@tonic-gate ASSERT(page_pptonum(pp) == 660*0Sstevel@tonic-gate page_pptonum(rootpp) + (pgs - 1)); 661*0Sstevel@tonic-gate page_destroy_pages(rootpp); 662*0Sstevel@tonic-gate root = 0; 663*0Sstevel@tonic-gate curnpgs = 0; 664*0Sstevel@tonic-gate } else { 665*0Sstevel@tonic-gate ASSERT(curnpgs > 1); 666*0Sstevel@tonic-gate ASSERT(page_pptonum(pp) == 667*0Sstevel@tonic-gate page_pptonum(rootpp) + (pgs - curnpgs)); 668*0Sstevel@tonic-gate curnpgs--; 669*0Sstevel@tonic-gate } 670*0Sstevel@tonic-gate } else { 671*0Sstevel@tonic-gate if (root != 0 || curnpgs != 0) { 672*0Sstevel@tonic-gate panic("segspt_free_pages: bad large page"); 673*0Sstevel@tonic-gate /*NOTREACHED*/ 674*0Sstevel@tonic-gate } 675*0Sstevel@tonic-gate /*LINTED: constant in conditional context */ 676*0Sstevel@tonic-gate VN_DISPOSE(pp, B_INVAL, 0, kcred); 677*0Sstevel@tonic-gate } 678*0Sstevel@tonic-gate } 679*0Sstevel@tonic-gate 680*0Sstevel@tonic-gate if (root != 0 || curnpgs != 0) { 681*0Sstevel@tonic-gate panic("segspt_free_pages: bad large page"); 682*0Sstevel@tonic-gate /*NOTREACHED*/ 683*0Sstevel@tonic-gate } 684*0Sstevel@tonic-gate 685*0Sstevel@tonic-gate /* 686*0Sstevel@tonic-gate * mark that pages have been released 687*0Sstevel@tonic-gate */ 688*0Sstevel@tonic-gate sptd->spt_realsize = 0; 689*0Sstevel@tonic-gate 690*0Sstevel@tonic-gate if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { 691*0Sstevel@tonic-gate atomic_add_long(&spt_used, -npages); 692*0Sstevel@tonic-gate anon_swap_restore(npages); 693*0Sstevel@tonic-gate } 694*0Sstevel@tonic-gate } 695*0Sstevel@tonic-gate 696*0Sstevel@tonic-gate /* 697*0Sstevel@tonic-gate * Get memory allocation policy info for specified address in given segment 698*0Sstevel@tonic-gate */ 699*0Sstevel@tonic-gate static lgrp_mem_policy_info_t * 700*0Sstevel@tonic-gate segspt_getpolicy(struct seg *seg, caddr_t addr) 701*0Sstevel@tonic-gate { 702*0Sstevel@tonic-gate struct anon_map *amp; 703*0Sstevel@tonic-gate ulong_t anon_index; 704*0Sstevel@tonic-gate lgrp_mem_policy_info_t *policy_info; 705*0Sstevel@tonic-gate struct spt_data *spt_data; 706*0Sstevel@tonic-gate 707*0Sstevel@tonic-gate ASSERT(seg != NULL); 708*0Sstevel@tonic-gate 709*0Sstevel@tonic-gate /* 710*0Sstevel@tonic-gate * Get anon_map from segspt 711*0Sstevel@tonic-gate * 712*0Sstevel@tonic-gate * Assume that no lock needs to be held on anon_map, since 713*0Sstevel@tonic-gate * it should be protected by its reference count which must be 714*0Sstevel@tonic-gate * nonzero for an existing segment 715*0Sstevel@tonic-gate * Need to grab readers lock on policy tree though 716*0Sstevel@tonic-gate */ 717*0Sstevel@tonic-gate spt_data = (struct spt_data *)seg->s_data; 718*0Sstevel@tonic-gate if (spt_data == NULL) 719*0Sstevel@tonic-gate return (NULL); 720*0Sstevel@tonic-gate amp = spt_data->spt_amp; 721*0Sstevel@tonic-gate ASSERT(amp->refcnt != 0); 722*0Sstevel@tonic-gate 723*0Sstevel@tonic-gate /* 724*0Sstevel@tonic-gate * Get policy info 725*0Sstevel@tonic-gate * 726*0Sstevel@tonic-gate * Assume starting anon index of 0 727*0Sstevel@tonic-gate */ 728*0Sstevel@tonic-gate anon_index = seg_page(seg, addr); 729*0Sstevel@tonic-gate policy_info = lgrp_shm_policy_get(amp, anon_index, NULL, 0); 730*0Sstevel@tonic-gate 731*0Sstevel@tonic-gate return (policy_info); 732*0Sstevel@tonic-gate } 733*0Sstevel@tonic-gate 734*0Sstevel@tonic-gate /* 735*0Sstevel@tonic-gate * DISM only. 736*0Sstevel@tonic-gate * Return locked pages over a given range. 737*0Sstevel@tonic-gate * 738*0Sstevel@tonic-gate * We will cache all DISM locked pages and save the pplist for the 739*0Sstevel@tonic-gate * entire segment in the ppa field of the underlying DISM segment structure. 740*0Sstevel@tonic-gate * Later, during a call to segspt_reclaim() we will use this ppa array 741*0Sstevel@tonic-gate * to page_unlock() all of the pages and then we will free this ppa list. 742*0Sstevel@tonic-gate */ 743*0Sstevel@tonic-gate /*ARGSUSED*/ 744*0Sstevel@tonic-gate static int 745*0Sstevel@tonic-gate segspt_dismpagelock(struct seg *seg, caddr_t addr, size_t len, 746*0Sstevel@tonic-gate struct page ***ppp, enum lock_type type, enum seg_rw rw) 747*0Sstevel@tonic-gate { 748*0Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 749*0Sstevel@tonic-gate struct seg *sptseg = shmd->shm_sptseg; 750*0Sstevel@tonic-gate struct spt_data *sptd = sptseg->s_data; 751*0Sstevel@tonic-gate pgcnt_t pg_idx, npages, tot_npages, npgs; 752*0Sstevel@tonic-gate struct page **pplist, **pl, **ppa, *pp; 753*0Sstevel@tonic-gate struct anon_map *amp; 754*0Sstevel@tonic-gate spgcnt_t an_idx; 755*0Sstevel@tonic-gate int ret = ENOTSUP; 756*0Sstevel@tonic-gate uint_t pl_built = 0; 757*0Sstevel@tonic-gate struct anon *ap; 758*0Sstevel@tonic-gate struct vnode *vp; 759*0Sstevel@tonic-gate u_offset_t off; 760*0Sstevel@tonic-gate pgcnt_t claim_availrmem = 0; 761*0Sstevel@tonic-gate uint_t szc; 762*0Sstevel@tonic-gate 763*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 764*0Sstevel@tonic-gate 765*0Sstevel@tonic-gate /* 766*0Sstevel@tonic-gate * We want to lock/unlock the entire ISM segment. Therefore, 767*0Sstevel@tonic-gate * we will be using the underlying sptseg and it's base address 768*0Sstevel@tonic-gate * and length for the caching arguments. 769*0Sstevel@tonic-gate */ 770*0Sstevel@tonic-gate ASSERT(sptseg); 771*0Sstevel@tonic-gate ASSERT(sptd); 772*0Sstevel@tonic-gate 773*0Sstevel@tonic-gate pg_idx = seg_page(seg, addr); 774*0Sstevel@tonic-gate npages = btopr(len); 775*0Sstevel@tonic-gate 776*0Sstevel@tonic-gate /* 777*0Sstevel@tonic-gate * check if the request is larger than number of pages covered 778*0Sstevel@tonic-gate * by amp 779*0Sstevel@tonic-gate */ 780*0Sstevel@tonic-gate if (pg_idx + npages > btopr(sptd->spt_amp->size)) { 781*0Sstevel@tonic-gate *ppp = NULL; 782*0Sstevel@tonic-gate return (ENOTSUP); 783*0Sstevel@tonic-gate } 784*0Sstevel@tonic-gate 785*0Sstevel@tonic-gate if (type == L_PAGEUNLOCK) { 786*0Sstevel@tonic-gate ASSERT(sptd->spt_ppa != NULL); 787*0Sstevel@tonic-gate 788*0Sstevel@tonic-gate seg_pinactive(seg, seg->s_base, sptd->spt_amp->size, 789*0Sstevel@tonic-gate sptd->spt_ppa, sptd->spt_prot, segspt_reclaim); 790*0Sstevel@tonic-gate 791*0Sstevel@tonic-gate /* 792*0Sstevel@tonic-gate * If someone is blocked while unmapping, we purge 793*0Sstevel@tonic-gate * segment page cache and thus reclaim pplist synchronously 794*0Sstevel@tonic-gate * without waiting for seg_pasync_thread. This speeds up 795*0Sstevel@tonic-gate * unmapping in cases where munmap(2) is called, while 796*0Sstevel@tonic-gate * raw async i/o is still in progress or where a thread 797*0Sstevel@tonic-gate * exits on data fault in a multithreaded application. 798*0Sstevel@tonic-gate */ 799*0Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as) && (shmd->shm_softlockcnt > 0)) { 800*0Sstevel@tonic-gate segspt_purge(seg); 801*0Sstevel@tonic-gate } 802*0Sstevel@tonic-gate return (0); 803*0Sstevel@tonic-gate } else if (type == L_PAGERECLAIM) { 804*0Sstevel@tonic-gate ASSERT(sptd->spt_ppa != NULL); 805*0Sstevel@tonic-gate (void) segspt_reclaim(seg, seg->s_base, sptd->spt_amp->size, 806*0Sstevel@tonic-gate sptd->spt_ppa, sptd->spt_prot); 807*0Sstevel@tonic-gate return (0); 808*0Sstevel@tonic-gate } 809*0Sstevel@tonic-gate 810*0Sstevel@tonic-gate if (sptd->spt_flags & DISM_PPA_CHANGED) { 811*0Sstevel@tonic-gate segspt_purge(seg); 812*0Sstevel@tonic-gate /* 813*0Sstevel@tonic-gate * for DISM ppa needs to be rebuild since 814*0Sstevel@tonic-gate * number of locked pages could be changed 815*0Sstevel@tonic-gate */ 816*0Sstevel@tonic-gate *ppp = NULL; 817*0Sstevel@tonic-gate return (ENOTSUP); 818*0Sstevel@tonic-gate } 819*0Sstevel@tonic-gate 820*0Sstevel@tonic-gate /* 821*0Sstevel@tonic-gate * First try to find pages in segment page cache, without 822*0Sstevel@tonic-gate * holding the segment lock. 823*0Sstevel@tonic-gate */ 824*0Sstevel@tonic-gate pplist = seg_plookup(seg, seg->s_base, sptd->spt_amp->size, 825*0Sstevel@tonic-gate sptd->spt_prot); 826*0Sstevel@tonic-gate if (pplist != NULL) { 827*0Sstevel@tonic-gate ASSERT(sptd->spt_ppa != NULL); 828*0Sstevel@tonic-gate ASSERT(sptd->spt_ppa == pplist); 829*0Sstevel@tonic-gate ppa = sptd->spt_ppa; 830*0Sstevel@tonic-gate for (an_idx = pg_idx; an_idx < pg_idx + npages; ) { 831*0Sstevel@tonic-gate if (ppa[an_idx] == NULL) { 832*0Sstevel@tonic-gate seg_pinactive(seg, seg->s_base, 833*0Sstevel@tonic-gate sptd->spt_amp->size, ppa, 834*0Sstevel@tonic-gate sptd->spt_prot, segspt_reclaim); 835*0Sstevel@tonic-gate *ppp = NULL; 836*0Sstevel@tonic-gate return (ENOTSUP); 837*0Sstevel@tonic-gate } 838*0Sstevel@tonic-gate if ((szc = ppa[an_idx]->p_szc) != 0) { 839*0Sstevel@tonic-gate npgs = page_get_pagecnt(szc); 840*0Sstevel@tonic-gate an_idx = P2ROUNDUP(an_idx + 1, npgs); 841*0Sstevel@tonic-gate } else { 842*0Sstevel@tonic-gate an_idx++; 843*0Sstevel@tonic-gate } 844*0Sstevel@tonic-gate } 845*0Sstevel@tonic-gate /* 846*0Sstevel@tonic-gate * Since we cache the entire DISM segment, we want to 847*0Sstevel@tonic-gate * set ppp to point to the first slot that corresponds 848*0Sstevel@tonic-gate * to the requested addr, i.e. pg_idx. 849*0Sstevel@tonic-gate */ 850*0Sstevel@tonic-gate *ppp = &(sptd->spt_ppa[pg_idx]); 851*0Sstevel@tonic-gate return (0); 852*0Sstevel@tonic-gate } 853*0Sstevel@tonic-gate 854*0Sstevel@tonic-gate /* The L_PAGELOCK case... */ 855*0Sstevel@tonic-gate mutex_enter(&sptd->spt_lock); 856*0Sstevel@tonic-gate /* 857*0Sstevel@tonic-gate * try to find pages in segment page cache with mutex 858*0Sstevel@tonic-gate */ 859*0Sstevel@tonic-gate pplist = seg_plookup(seg, seg->s_base, sptd->spt_amp->size, 860*0Sstevel@tonic-gate sptd->spt_prot); 861*0Sstevel@tonic-gate if (pplist != NULL) { 862*0Sstevel@tonic-gate ASSERT(sptd->spt_ppa != NULL); 863*0Sstevel@tonic-gate ASSERT(sptd->spt_ppa == pplist); 864*0Sstevel@tonic-gate ppa = sptd->spt_ppa; 865*0Sstevel@tonic-gate for (an_idx = pg_idx; an_idx < pg_idx + npages; ) { 866*0Sstevel@tonic-gate if (ppa[an_idx] == NULL) { 867*0Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 868*0Sstevel@tonic-gate seg_pinactive(seg, seg->s_base, 869*0Sstevel@tonic-gate sptd->spt_amp->size, ppa, 870*0Sstevel@tonic-gate sptd->spt_prot, segspt_reclaim); 871*0Sstevel@tonic-gate *ppp = NULL; 872*0Sstevel@tonic-gate return (ENOTSUP); 873*0Sstevel@tonic-gate } 874*0Sstevel@tonic-gate if ((szc = ppa[an_idx]->p_szc) != 0) { 875*0Sstevel@tonic-gate npgs = page_get_pagecnt(szc); 876*0Sstevel@tonic-gate an_idx = P2ROUNDUP(an_idx + 1, npgs); 877*0Sstevel@tonic-gate } else { 878*0Sstevel@tonic-gate an_idx++; 879*0Sstevel@tonic-gate } 880*0Sstevel@tonic-gate } 881*0Sstevel@tonic-gate /* 882*0Sstevel@tonic-gate * Since we cache the entire DISM segment, we want to 883*0Sstevel@tonic-gate * set ppp to point to the first slot that corresponds 884*0Sstevel@tonic-gate * to the requested addr, i.e. pg_idx. 885*0Sstevel@tonic-gate */ 886*0Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 887*0Sstevel@tonic-gate *ppp = &(sptd->spt_ppa[pg_idx]); 888*0Sstevel@tonic-gate return (0); 889*0Sstevel@tonic-gate } 890*0Sstevel@tonic-gate if (seg_pinsert_check(seg, sptd->spt_amp->size, SEGP_FORCE_WIRED) == 891*0Sstevel@tonic-gate SEGP_FAIL) { 892*0Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 893*0Sstevel@tonic-gate *ppp = NULL; 894*0Sstevel@tonic-gate return (ENOTSUP); 895*0Sstevel@tonic-gate } 896*0Sstevel@tonic-gate 897*0Sstevel@tonic-gate /* 898*0Sstevel@tonic-gate * No need to worry about protections because DISM pages are always rw. 899*0Sstevel@tonic-gate */ 900*0Sstevel@tonic-gate pl = pplist = NULL; 901*0Sstevel@tonic-gate amp = sptd->spt_amp; 902*0Sstevel@tonic-gate 903*0Sstevel@tonic-gate /* 904*0Sstevel@tonic-gate * Do we need to build the ppa array? 905*0Sstevel@tonic-gate */ 906*0Sstevel@tonic-gate if (sptd->spt_ppa == NULL) { 907*0Sstevel@tonic-gate pgcnt_t lpg_cnt = 0; 908*0Sstevel@tonic-gate 909*0Sstevel@tonic-gate pl_built = 1; 910*0Sstevel@tonic-gate tot_npages = btopr(sptd->spt_amp->size); 911*0Sstevel@tonic-gate 912*0Sstevel@tonic-gate ASSERT(sptd->spt_pcachecnt == 0); 913*0Sstevel@tonic-gate pplist = kmem_zalloc(sizeof (page_t *) * tot_npages, KM_SLEEP); 914*0Sstevel@tonic-gate pl = pplist; 915*0Sstevel@tonic-gate 916*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 917*0Sstevel@tonic-gate for (an_idx = 0; an_idx < tot_npages; ) { 918*0Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, an_idx); 919*0Sstevel@tonic-gate /* 920*0Sstevel@tonic-gate * Cache only mlocked pages. For large pages 921*0Sstevel@tonic-gate * if one (constituent) page is mlocked 922*0Sstevel@tonic-gate * all pages for that large page 923*0Sstevel@tonic-gate * are cached also. This is for quick 924*0Sstevel@tonic-gate * lookups of ppa array; 925*0Sstevel@tonic-gate */ 926*0Sstevel@tonic-gate if ((ap != NULL) && (lpg_cnt != 0 || 927*0Sstevel@tonic-gate (sptd->spt_ppa_lckcnt[an_idx] != 0))) { 928*0Sstevel@tonic-gate 929*0Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 930*0Sstevel@tonic-gate pp = page_lookup(vp, off, SE_SHARED); 931*0Sstevel@tonic-gate ASSERT(pp != NULL); 932*0Sstevel@tonic-gate if (lpg_cnt == 0) { 933*0Sstevel@tonic-gate npgs = page_get_pagecnt(pp->p_szc); 934*0Sstevel@tonic-gate if (!IS_P2ALIGNED(an_idx, npgs)) { 935*0Sstevel@tonic-gate an_idx = P2ALIGN(an_idx, npgs); 936*0Sstevel@tonic-gate page_unlock(pp); 937*0Sstevel@tonic-gate continue; 938*0Sstevel@tonic-gate } 939*0Sstevel@tonic-gate } 940*0Sstevel@tonic-gate if (++lpg_cnt == npgs) 941*0Sstevel@tonic-gate lpg_cnt = 0; 942*0Sstevel@tonic-gate 943*0Sstevel@tonic-gate /* 944*0Sstevel@tonic-gate * availrmem is decremented only 945*0Sstevel@tonic-gate * for unlocked pages 946*0Sstevel@tonic-gate */ 947*0Sstevel@tonic-gate if (sptd->spt_ppa_lckcnt[an_idx] == 0) 948*0Sstevel@tonic-gate claim_availrmem++; 949*0Sstevel@tonic-gate pplist[an_idx] = pp; 950*0Sstevel@tonic-gate } 951*0Sstevel@tonic-gate an_idx++; 952*0Sstevel@tonic-gate } 953*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 954*0Sstevel@tonic-gate 955*0Sstevel@tonic-gate mutex_enter(&freemem_lock); 956*0Sstevel@tonic-gate if (availrmem < tune.t_minarmem + claim_availrmem) { 957*0Sstevel@tonic-gate mutex_exit(&freemem_lock); 958*0Sstevel@tonic-gate ret = FC_MAKE_ERR(ENOMEM); 959*0Sstevel@tonic-gate claim_availrmem = 0; 960*0Sstevel@tonic-gate goto insert_fail; 961*0Sstevel@tonic-gate } else { 962*0Sstevel@tonic-gate availrmem -= claim_availrmem; 963*0Sstevel@tonic-gate } 964*0Sstevel@tonic-gate mutex_exit(&freemem_lock); 965*0Sstevel@tonic-gate 966*0Sstevel@tonic-gate sptd->spt_ppa = pl; 967*0Sstevel@tonic-gate } else { 968*0Sstevel@tonic-gate /* 969*0Sstevel@tonic-gate * We already have a valid ppa[]. 970*0Sstevel@tonic-gate */ 971*0Sstevel@tonic-gate pl = sptd->spt_ppa; 972*0Sstevel@tonic-gate } 973*0Sstevel@tonic-gate 974*0Sstevel@tonic-gate ASSERT(pl != NULL); 975*0Sstevel@tonic-gate 976*0Sstevel@tonic-gate ret = seg_pinsert(seg, seg->s_base, sptd->spt_amp->size, 977*0Sstevel@tonic-gate pl, sptd->spt_prot, SEGP_FORCE_WIRED | SEGP_ASYNC_FLUSH, 978*0Sstevel@tonic-gate segspt_reclaim); 979*0Sstevel@tonic-gate if (ret == SEGP_FAIL) { 980*0Sstevel@tonic-gate /* 981*0Sstevel@tonic-gate * seg_pinsert failed. We return 982*0Sstevel@tonic-gate * ENOTSUP, so that the as_pagelock() code will 983*0Sstevel@tonic-gate * then try the slower F_SOFTLOCK path. 984*0Sstevel@tonic-gate */ 985*0Sstevel@tonic-gate sptd->spt_ppa = NULL; 986*0Sstevel@tonic-gate ret = ENOTSUP; 987*0Sstevel@tonic-gate goto insert_fail; 988*0Sstevel@tonic-gate } 989*0Sstevel@tonic-gate 990*0Sstevel@tonic-gate /* 991*0Sstevel@tonic-gate * In either case, we increment softlockcnt on the 'real' segment. 992*0Sstevel@tonic-gate */ 993*0Sstevel@tonic-gate sptd->spt_pcachecnt++; 994*0Sstevel@tonic-gate atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), 1); 995*0Sstevel@tonic-gate 996*0Sstevel@tonic-gate ppa = sptd->spt_ppa; 997*0Sstevel@tonic-gate for (an_idx = pg_idx; an_idx < pg_idx + npages; ) { 998*0Sstevel@tonic-gate if (ppa[an_idx] == NULL) { 999*0Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 1000*0Sstevel@tonic-gate seg_pinactive(seg, seg->s_base, sptd->spt_amp->size, 1001*0Sstevel@tonic-gate pl, sptd->spt_prot, segspt_reclaim); 1002*0Sstevel@tonic-gate *ppp = NULL; 1003*0Sstevel@tonic-gate return (ENOTSUP); 1004*0Sstevel@tonic-gate } 1005*0Sstevel@tonic-gate if ((szc = ppa[an_idx]->p_szc) != 0) { 1006*0Sstevel@tonic-gate npgs = page_get_pagecnt(szc); 1007*0Sstevel@tonic-gate an_idx = P2ROUNDUP(an_idx + 1, npgs); 1008*0Sstevel@tonic-gate } else { 1009*0Sstevel@tonic-gate an_idx++; 1010*0Sstevel@tonic-gate } 1011*0Sstevel@tonic-gate } 1012*0Sstevel@tonic-gate /* 1013*0Sstevel@tonic-gate * We can now drop the sptd->spt_lock since the ppa[] 1014*0Sstevel@tonic-gate * exists and he have incremented pacachecnt. 1015*0Sstevel@tonic-gate */ 1016*0Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 1017*0Sstevel@tonic-gate 1018*0Sstevel@tonic-gate /* 1019*0Sstevel@tonic-gate * Since we cache the entire segment, we want to 1020*0Sstevel@tonic-gate * set ppp to point to the first slot that corresponds 1021*0Sstevel@tonic-gate * to the requested addr, i.e. pg_idx. 1022*0Sstevel@tonic-gate */ 1023*0Sstevel@tonic-gate *ppp = &(sptd->spt_ppa[pg_idx]); 1024*0Sstevel@tonic-gate return (ret); 1025*0Sstevel@tonic-gate 1026*0Sstevel@tonic-gate insert_fail: 1027*0Sstevel@tonic-gate /* 1028*0Sstevel@tonic-gate * We will only reach this code if we tried and failed. 1029*0Sstevel@tonic-gate * 1030*0Sstevel@tonic-gate * And we can drop the lock on the dummy seg, once we've failed 1031*0Sstevel@tonic-gate * to set up a new ppa[]. 1032*0Sstevel@tonic-gate */ 1033*0Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 1034*0Sstevel@tonic-gate 1035*0Sstevel@tonic-gate if (pl_built) { 1036*0Sstevel@tonic-gate mutex_enter(&freemem_lock); 1037*0Sstevel@tonic-gate availrmem += claim_availrmem; 1038*0Sstevel@tonic-gate mutex_exit(&freemem_lock); 1039*0Sstevel@tonic-gate 1040*0Sstevel@tonic-gate /* 1041*0Sstevel@tonic-gate * We created pl and we need to destroy it. 1042*0Sstevel@tonic-gate */ 1043*0Sstevel@tonic-gate pplist = pl; 1044*0Sstevel@tonic-gate for (an_idx = 0; an_idx < tot_npages; an_idx++) { 1045*0Sstevel@tonic-gate if (pplist[an_idx] != NULL) 1046*0Sstevel@tonic-gate page_unlock(pplist[an_idx]); 1047*0Sstevel@tonic-gate } 1048*0Sstevel@tonic-gate kmem_free(pl, sizeof (page_t *) * tot_npages); 1049*0Sstevel@tonic-gate } 1050*0Sstevel@tonic-gate 1051*0Sstevel@tonic-gate if (shmd->shm_softlockcnt <= 0) { 1052*0Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 1053*0Sstevel@tonic-gate mutex_enter(&seg->s_as->a_contents); 1054*0Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 1055*0Sstevel@tonic-gate AS_CLRUNMAPWAIT(seg->s_as); 1056*0Sstevel@tonic-gate cv_broadcast(&seg->s_as->a_cv); 1057*0Sstevel@tonic-gate } 1058*0Sstevel@tonic-gate mutex_exit(&seg->s_as->a_contents); 1059*0Sstevel@tonic-gate } 1060*0Sstevel@tonic-gate } 1061*0Sstevel@tonic-gate *ppp = NULL; 1062*0Sstevel@tonic-gate return (ret); 1063*0Sstevel@tonic-gate } 1064*0Sstevel@tonic-gate 1065*0Sstevel@tonic-gate 1066*0Sstevel@tonic-gate 1067*0Sstevel@tonic-gate /* 1068*0Sstevel@tonic-gate * return locked pages over a given range. 1069*0Sstevel@tonic-gate * 1070*0Sstevel@tonic-gate * We will cache the entire ISM segment and save the pplist for the 1071*0Sstevel@tonic-gate * entire segment in the ppa field of the underlying ISM segment structure. 1072*0Sstevel@tonic-gate * Later, during a call to segspt_reclaim() we will use this ppa array 1073*0Sstevel@tonic-gate * to page_unlock() all of the pages and then we will free this ppa list. 1074*0Sstevel@tonic-gate */ 1075*0Sstevel@tonic-gate /*ARGSUSED*/ 1076*0Sstevel@tonic-gate static int 1077*0Sstevel@tonic-gate segspt_shmpagelock(struct seg *seg, caddr_t addr, size_t len, 1078*0Sstevel@tonic-gate struct page ***ppp, enum lock_type type, enum seg_rw rw) 1079*0Sstevel@tonic-gate { 1080*0Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 1081*0Sstevel@tonic-gate struct seg *sptseg = shmd->shm_sptseg; 1082*0Sstevel@tonic-gate struct spt_data *sptd = sptseg->s_data; 1083*0Sstevel@tonic-gate pgcnt_t np, page_index, npages; 1084*0Sstevel@tonic-gate caddr_t a, spt_base; 1085*0Sstevel@tonic-gate struct page **pplist, **pl, *pp; 1086*0Sstevel@tonic-gate struct anon_map *amp; 1087*0Sstevel@tonic-gate ulong_t anon_index; 1088*0Sstevel@tonic-gate int ret = ENOTSUP; 1089*0Sstevel@tonic-gate uint_t pl_built = 0; 1090*0Sstevel@tonic-gate struct anon *ap; 1091*0Sstevel@tonic-gate struct vnode *vp; 1092*0Sstevel@tonic-gate u_offset_t off; 1093*0Sstevel@tonic-gate 1094*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 1095*0Sstevel@tonic-gate 1096*0Sstevel@tonic-gate /* 1097*0Sstevel@tonic-gate * We want to lock/unlock the entire ISM segment. Therefore, 1098*0Sstevel@tonic-gate * we will be using the underlying sptseg and it's base address 1099*0Sstevel@tonic-gate * and length for the caching arguments. 1100*0Sstevel@tonic-gate */ 1101*0Sstevel@tonic-gate ASSERT(sptseg); 1102*0Sstevel@tonic-gate ASSERT(sptd); 1103*0Sstevel@tonic-gate 1104*0Sstevel@tonic-gate if (sptd->spt_flags & SHM_PAGEABLE) { 1105*0Sstevel@tonic-gate return (segspt_dismpagelock(seg, addr, len, ppp, type, rw)); 1106*0Sstevel@tonic-gate } 1107*0Sstevel@tonic-gate 1108*0Sstevel@tonic-gate page_index = seg_page(seg, addr); 1109*0Sstevel@tonic-gate npages = btopr(len); 1110*0Sstevel@tonic-gate 1111*0Sstevel@tonic-gate /* 1112*0Sstevel@tonic-gate * check if the request is larger than number of pages covered 1113*0Sstevel@tonic-gate * by amp 1114*0Sstevel@tonic-gate */ 1115*0Sstevel@tonic-gate if (page_index + npages > btopr(sptd->spt_amp->size)) { 1116*0Sstevel@tonic-gate *ppp = NULL; 1117*0Sstevel@tonic-gate return (ENOTSUP); 1118*0Sstevel@tonic-gate } 1119*0Sstevel@tonic-gate 1120*0Sstevel@tonic-gate if (type == L_PAGEUNLOCK) { 1121*0Sstevel@tonic-gate 1122*0Sstevel@tonic-gate ASSERT(sptd->spt_ppa != NULL); 1123*0Sstevel@tonic-gate 1124*0Sstevel@tonic-gate seg_pinactive(seg, seg->s_base, sptd->spt_amp->size, 1125*0Sstevel@tonic-gate sptd->spt_ppa, sptd->spt_prot, segspt_reclaim); 1126*0Sstevel@tonic-gate 1127*0Sstevel@tonic-gate /* 1128*0Sstevel@tonic-gate * If someone is blocked while unmapping, we purge 1129*0Sstevel@tonic-gate * segment page cache and thus reclaim pplist synchronously 1130*0Sstevel@tonic-gate * without waiting for seg_pasync_thread. This speeds up 1131*0Sstevel@tonic-gate * unmapping in cases where munmap(2) is called, while 1132*0Sstevel@tonic-gate * raw async i/o is still in progress or where a thread 1133*0Sstevel@tonic-gate * exits on data fault in a multithreaded application. 1134*0Sstevel@tonic-gate */ 1135*0Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as) && (shmd->shm_softlockcnt > 0)) { 1136*0Sstevel@tonic-gate segspt_purge(seg); 1137*0Sstevel@tonic-gate } 1138*0Sstevel@tonic-gate return (0); 1139*0Sstevel@tonic-gate } else if (type == L_PAGERECLAIM) { 1140*0Sstevel@tonic-gate ASSERT(sptd->spt_ppa != NULL); 1141*0Sstevel@tonic-gate 1142*0Sstevel@tonic-gate (void) segspt_reclaim(seg, seg->s_base, sptd->spt_amp->size, 1143*0Sstevel@tonic-gate sptd->spt_ppa, sptd->spt_prot); 1144*0Sstevel@tonic-gate return (0); 1145*0Sstevel@tonic-gate } 1146*0Sstevel@tonic-gate 1147*0Sstevel@tonic-gate /* 1148*0Sstevel@tonic-gate * First try to find pages in segment page cache, without 1149*0Sstevel@tonic-gate * holding the segment lock. 1150*0Sstevel@tonic-gate */ 1151*0Sstevel@tonic-gate pplist = seg_plookup(seg, seg->s_base, sptd->spt_amp->size, 1152*0Sstevel@tonic-gate sptd->spt_prot); 1153*0Sstevel@tonic-gate if (pplist != NULL) { 1154*0Sstevel@tonic-gate ASSERT(sptd->spt_ppa == pplist); 1155*0Sstevel@tonic-gate ASSERT(sptd->spt_ppa[page_index]); 1156*0Sstevel@tonic-gate /* 1157*0Sstevel@tonic-gate * Since we cache the entire ISM segment, we want to 1158*0Sstevel@tonic-gate * set ppp to point to the first slot that corresponds 1159*0Sstevel@tonic-gate * to the requested addr, i.e. page_index. 1160*0Sstevel@tonic-gate */ 1161*0Sstevel@tonic-gate *ppp = &(sptd->spt_ppa[page_index]); 1162*0Sstevel@tonic-gate return (0); 1163*0Sstevel@tonic-gate } 1164*0Sstevel@tonic-gate 1165*0Sstevel@tonic-gate /* The L_PAGELOCK case... */ 1166*0Sstevel@tonic-gate mutex_enter(&sptd->spt_lock); 1167*0Sstevel@tonic-gate 1168*0Sstevel@tonic-gate /* 1169*0Sstevel@tonic-gate * try to find pages in segment page cache 1170*0Sstevel@tonic-gate */ 1171*0Sstevel@tonic-gate pplist = seg_plookup(seg, seg->s_base, sptd->spt_amp->size, 1172*0Sstevel@tonic-gate sptd->spt_prot); 1173*0Sstevel@tonic-gate if (pplist != NULL) { 1174*0Sstevel@tonic-gate ASSERT(sptd->spt_ppa == pplist); 1175*0Sstevel@tonic-gate /* 1176*0Sstevel@tonic-gate * Since we cache the entire segment, we want to 1177*0Sstevel@tonic-gate * set ppp to point to the first slot that corresponds 1178*0Sstevel@tonic-gate * to the requested addr, i.e. page_index. 1179*0Sstevel@tonic-gate */ 1180*0Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 1181*0Sstevel@tonic-gate *ppp = &(sptd->spt_ppa[page_index]); 1182*0Sstevel@tonic-gate return (0); 1183*0Sstevel@tonic-gate } 1184*0Sstevel@tonic-gate 1185*0Sstevel@tonic-gate if (seg_pinsert_check(seg, sptd->spt_amp->size, SEGP_FORCE_WIRED) == 1186*0Sstevel@tonic-gate SEGP_FAIL) { 1187*0Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 1188*0Sstevel@tonic-gate *ppp = NULL; 1189*0Sstevel@tonic-gate return (ENOTSUP); 1190*0Sstevel@tonic-gate } 1191*0Sstevel@tonic-gate 1192*0Sstevel@tonic-gate /* 1193*0Sstevel@tonic-gate * No need to worry about protections because ISM pages 1194*0Sstevel@tonic-gate * are always rw. 1195*0Sstevel@tonic-gate */ 1196*0Sstevel@tonic-gate pl = pplist = NULL; 1197*0Sstevel@tonic-gate 1198*0Sstevel@tonic-gate /* 1199*0Sstevel@tonic-gate * Do we need to build the ppa array? 1200*0Sstevel@tonic-gate */ 1201*0Sstevel@tonic-gate if (sptd->spt_ppa == NULL) { 1202*0Sstevel@tonic-gate ASSERT(sptd->spt_ppa == pplist); 1203*0Sstevel@tonic-gate 1204*0Sstevel@tonic-gate spt_base = sptseg->s_base; 1205*0Sstevel@tonic-gate pl_built = 1; 1206*0Sstevel@tonic-gate 1207*0Sstevel@tonic-gate /* 1208*0Sstevel@tonic-gate * availrmem is decremented once during anon_swap_adjust() 1209*0Sstevel@tonic-gate * and is incremented during the anon_unresv(), which is 1210*0Sstevel@tonic-gate * called from shm_rm_amp() when the segment is destroyed. 1211*0Sstevel@tonic-gate */ 1212*0Sstevel@tonic-gate amp = sptd->spt_amp; 1213*0Sstevel@tonic-gate ASSERT(amp != NULL); 1214*0Sstevel@tonic-gate 1215*0Sstevel@tonic-gate /* pcachecnt is protected by sptd->spt_lock */ 1216*0Sstevel@tonic-gate ASSERT(sptd->spt_pcachecnt == 0); 1217*0Sstevel@tonic-gate pplist = kmem_zalloc(sizeof (page_t *) 1218*0Sstevel@tonic-gate * btopr(sptd->spt_amp->size), KM_SLEEP); 1219*0Sstevel@tonic-gate pl = pplist; 1220*0Sstevel@tonic-gate 1221*0Sstevel@tonic-gate anon_index = seg_page(sptseg, spt_base); 1222*0Sstevel@tonic-gate 1223*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 1224*0Sstevel@tonic-gate for (a = spt_base; a < (spt_base + sptd->spt_amp->size); 1225*0Sstevel@tonic-gate a += PAGESIZE, anon_index++, pplist++) { 1226*0Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, anon_index); 1227*0Sstevel@tonic-gate ASSERT(ap != NULL); 1228*0Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 1229*0Sstevel@tonic-gate pp = page_lookup(vp, off, SE_SHARED); 1230*0Sstevel@tonic-gate ASSERT(pp != NULL); 1231*0Sstevel@tonic-gate *pplist = pp; 1232*0Sstevel@tonic-gate } 1233*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 1234*0Sstevel@tonic-gate 1235*0Sstevel@tonic-gate if (a < (spt_base + sptd->spt_amp->size)) { 1236*0Sstevel@tonic-gate ret = ENOTSUP; 1237*0Sstevel@tonic-gate goto insert_fail; 1238*0Sstevel@tonic-gate } 1239*0Sstevel@tonic-gate sptd->spt_ppa = pl; 1240*0Sstevel@tonic-gate } else { 1241*0Sstevel@tonic-gate /* 1242*0Sstevel@tonic-gate * We already have a valid ppa[]. 1243*0Sstevel@tonic-gate */ 1244*0Sstevel@tonic-gate pl = sptd->spt_ppa; 1245*0Sstevel@tonic-gate } 1246*0Sstevel@tonic-gate 1247*0Sstevel@tonic-gate ASSERT(pl != NULL); 1248*0Sstevel@tonic-gate 1249*0Sstevel@tonic-gate ret = seg_pinsert(seg, seg->s_base, sptd->spt_amp->size, 1250*0Sstevel@tonic-gate pl, sptd->spt_prot, SEGP_FORCE_WIRED, segspt_reclaim); 1251*0Sstevel@tonic-gate if (ret == SEGP_FAIL) { 1252*0Sstevel@tonic-gate /* 1253*0Sstevel@tonic-gate * seg_pinsert failed. We return 1254*0Sstevel@tonic-gate * ENOTSUP, so that the as_pagelock() code will 1255*0Sstevel@tonic-gate * then try the slower F_SOFTLOCK path. 1256*0Sstevel@tonic-gate */ 1257*0Sstevel@tonic-gate if (pl_built) { 1258*0Sstevel@tonic-gate /* 1259*0Sstevel@tonic-gate * No one else has referenced the ppa[]. 1260*0Sstevel@tonic-gate * We created it and we need to destroy it. 1261*0Sstevel@tonic-gate */ 1262*0Sstevel@tonic-gate sptd->spt_ppa = NULL; 1263*0Sstevel@tonic-gate } 1264*0Sstevel@tonic-gate ret = ENOTSUP; 1265*0Sstevel@tonic-gate goto insert_fail; 1266*0Sstevel@tonic-gate } 1267*0Sstevel@tonic-gate 1268*0Sstevel@tonic-gate /* 1269*0Sstevel@tonic-gate * In either case, we increment softlockcnt on the 'real' segment. 1270*0Sstevel@tonic-gate */ 1271*0Sstevel@tonic-gate sptd->spt_pcachecnt++; 1272*0Sstevel@tonic-gate atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), 1); 1273*0Sstevel@tonic-gate 1274*0Sstevel@tonic-gate /* 1275*0Sstevel@tonic-gate * We can now drop the sptd->spt_lock since the ppa[] 1276*0Sstevel@tonic-gate * exists and he have incremented pacachecnt. 1277*0Sstevel@tonic-gate */ 1278*0Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 1279*0Sstevel@tonic-gate 1280*0Sstevel@tonic-gate /* 1281*0Sstevel@tonic-gate * Since we cache the entire segment, we want to 1282*0Sstevel@tonic-gate * set ppp to point to the first slot that corresponds 1283*0Sstevel@tonic-gate * to the requested addr, i.e. page_index. 1284*0Sstevel@tonic-gate */ 1285*0Sstevel@tonic-gate *ppp = &(sptd->spt_ppa[page_index]); 1286*0Sstevel@tonic-gate return (ret); 1287*0Sstevel@tonic-gate 1288*0Sstevel@tonic-gate insert_fail: 1289*0Sstevel@tonic-gate /* 1290*0Sstevel@tonic-gate * We will only reach this code if we tried and failed. 1291*0Sstevel@tonic-gate * 1292*0Sstevel@tonic-gate * And we can drop the lock on the dummy seg, once we've failed 1293*0Sstevel@tonic-gate * to set up a new ppa[]. 1294*0Sstevel@tonic-gate */ 1295*0Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 1296*0Sstevel@tonic-gate 1297*0Sstevel@tonic-gate if (pl_built) { 1298*0Sstevel@tonic-gate /* 1299*0Sstevel@tonic-gate * We created pl and we need to destroy it. 1300*0Sstevel@tonic-gate */ 1301*0Sstevel@tonic-gate pplist = pl; 1302*0Sstevel@tonic-gate np = (((uintptr_t)(a - spt_base)) >> PAGESHIFT); 1303*0Sstevel@tonic-gate while (np) { 1304*0Sstevel@tonic-gate page_unlock(*pplist); 1305*0Sstevel@tonic-gate np--; 1306*0Sstevel@tonic-gate pplist++; 1307*0Sstevel@tonic-gate } 1308*0Sstevel@tonic-gate kmem_free(pl, sizeof (page_t *) * 1309*0Sstevel@tonic-gate btopr(sptd->spt_amp->size)); 1310*0Sstevel@tonic-gate } 1311*0Sstevel@tonic-gate if (shmd->shm_softlockcnt <= 0) { 1312*0Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 1313*0Sstevel@tonic-gate mutex_enter(&seg->s_as->a_contents); 1314*0Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 1315*0Sstevel@tonic-gate AS_CLRUNMAPWAIT(seg->s_as); 1316*0Sstevel@tonic-gate cv_broadcast(&seg->s_as->a_cv); 1317*0Sstevel@tonic-gate } 1318*0Sstevel@tonic-gate mutex_exit(&seg->s_as->a_contents); 1319*0Sstevel@tonic-gate } 1320*0Sstevel@tonic-gate } 1321*0Sstevel@tonic-gate *ppp = NULL; 1322*0Sstevel@tonic-gate return (ret); 1323*0Sstevel@tonic-gate } 1324*0Sstevel@tonic-gate 1325*0Sstevel@tonic-gate /* 1326*0Sstevel@tonic-gate * purge any cached pages in the I/O page cache 1327*0Sstevel@tonic-gate */ 1328*0Sstevel@tonic-gate static void 1329*0Sstevel@tonic-gate segspt_purge(struct seg *seg) 1330*0Sstevel@tonic-gate { 1331*0Sstevel@tonic-gate seg_ppurge(seg); 1332*0Sstevel@tonic-gate } 1333*0Sstevel@tonic-gate 1334*0Sstevel@tonic-gate static int 1335*0Sstevel@tonic-gate segspt_reclaim(struct seg *seg, caddr_t addr, size_t len, struct page **pplist, 1336*0Sstevel@tonic-gate enum seg_rw rw) 1337*0Sstevel@tonic-gate { 1338*0Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 1339*0Sstevel@tonic-gate struct seg *sptseg; 1340*0Sstevel@tonic-gate struct spt_data *sptd; 1341*0Sstevel@tonic-gate pgcnt_t npages, i, free_availrmem = 0; 1342*0Sstevel@tonic-gate int done = 0; 1343*0Sstevel@tonic-gate 1344*0Sstevel@tonic-gate #ifdef lint 1345*0Sstevel@tonic-gate addr = addr; 1346*0Sstevel@tonic-gate #endif 1347*0Sstevel@tonic-gate sptseg = shmd->shm_sptseg; 1348*0Sstevel@tonic-gate sptd = sptseg->s_data; 1349*0Sstevel@tonic-gate npages = (len >> PAGESHIFT); 1350*0Sstevel@tonic-gate ASSERT(npages); 1351*0Sstevel@tonic-gate ASSERT(sptd->spt_pcachecnt != 0); 1352*0Sstevel@tonic-gate ASSERT(sptd->spt_ppa == pplist); 1353*0Sstevel@tonic-gate ASSERT(npages == btopr(sptd->spt_amp->size)); 1354*0Sstevel@tonic-gate 1355*0Sstevel@tonic-gate /* 1356*0Sstevel@tonic-gate * Acquire the lock on the dummy seg and destroy the 1357*0Sstevel@tonic-gate * ppa array IF this is the last pcachecnt. 1358*0Sstevel@tonic-gate */ 1359*0Sstevel@tonic-gate mutex_enter(&sptd->spt_lock); 1360*0Sstevel@tonic-gate if (--sptd->spt_pcachecnt == 0) { 1361*0Sstevel@tonic-gate for (i = 0; i < npages; i++) { 1362*0Sstevel@tonic-gate if (pplist[i] == NULL) { 1363*0Sstevel@tonic-gate continue; 1364*0Sstevel@tonic-gate } 1365*0Sstevel@tonic-gate if (rw == S_WRITE) { 1366*0Sstevel@tonic-gate hat_setrefmod(pplist[i]); 1367*0Sstevel@tonic-gate } else { 1368*0Sstevel@tonic-gate hat_setref(pplist[i]); 1369*0Sstevel@tonic-gate } 1370*0Sstevel@tonic-gate if ((sptd->spt_flags & SHM_PAGEABLE) && 1371*0Sstevel@tonic-gate (sptd->spt_ppa_lckcnt[i] == 0)) 1372*0Sstevel@tonic-gate free_availrmem++; 1373*0Sstevel@tonic-gate page_unlock(pplist[i]); 1374*0Sstevel@tonic-gate } 1375*0Sstevel@tonic-gate if (sptd->spt_flags & SHM_PAGEABLE) { 1376*0Sstevel@tonic-gate mutex_enter(&freemem_lock); 1377*0Sstevel@tonic-gate availrmem += free_availrmem; 1378*0Sstevel@tonic-gate mutex_exit(&freemem_lock); 1379*0Sstevel@tonic-gate } 1380*0Sstevel@tonic-gate /* 1381*0Sstevel@tonic-gate * Since we want to cach/uncache the entire ISM segment, 1382*0Sstevel@tonic-gate * we will track the pplist in a segspt specific field 1383*0Sstevel@tonic-gate * ppa, that is initialized at the time we add an entry to 1384*0Sstevel@tonic-gate * the cache. 1385*0Sstevel@tonic-gate */ 1386*0Sstevel@tonic-gate ASSERT(sptd->spt_pcachecnt == 0); 1387*0Sstevel@tonic-gate kmem_free(pplist, sizeof (page_t *) * npages); 1388*0Sstevel@tonic-gate sptd->spt_ppa = NULL; 1389*0Sstevel@tonic-gate sptd->spt_flags &= ~DISM_PPA_CHANGED; 1390*0Sstevel@tonic-gate done = 1; 1391*0Sstevel@tonic-gate } 1392*0Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 1393*0Sstevel@tonic-gate /* 1394*0Sstevel@tonic-gate * Now decrement softlockcnt. 1395*0Sstevel@tonic-gate */ 1396*0Sstevel@tonic-gate atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), -1); 1397*0Sstevel@tonic-gate 1398*0Sstevel@tonic-gate if (shmd->shm_softlockcnt <= 0) { 1399*0Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 1400*0Sstevel@tonic-gate mutex_enter(&seg->s_as->a_contents); 1401*0Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 1402*0Sstevel@tonic-gate AS_CLRUNMAPWAIT(seg->s_as); 1403*0Sstevel@tonic-gate cv_broadcast(&seg->s_as->a_cv); 1404*0Sstevel@tonic-gate } 1405*0Sstevel@tonic-gate mutex_exit(&seg->s_as->a_contents); 1406*0Sstevel@tonic-gate } 1407*0Sstevel@tonic-gate } 1408*0Sstevel@tonic-gate return (done); 1409*0Sstevel@tonic-gate } 1410*0Sstevel@tonic-gate 1411*0Sstevel@tonic-gate /* 1412*0Sstevel@tonic-gate * Do a F_SOFTUNLOCK call over the range requested. 1413*0Sstevel@tonic-gate * The range must have already been F_SOFTLOCK'ed. 1414*0Sstevel@tonic-gate * 1415*0Sstevel@tonic-gate * The calls to acquire and release the anon map lock mutex were 1416*0Sstevel@tonic-gate * removed in order to avoid a deadly embrace during a DR 1417*0Sstevel@tonic-gate * memory delete operation. (Eg. DR blocks while waiting for a 1418*0Sstevel@tonic-gate * exclusive lock on a page that is being used for kaio; the 1419*0Sstevel@tonic-gate * thread that will complete the kaio and call segspt_softunlock 1420*0Sstevel@tonic-gate * blocks on the anon map lock; another thread holding the anon 1421*0Sstevel@tonic-gate * map lock blocks on another page lock via the segspt_shmfault 1422*0Sstevel@tonic-gate * -> page_lookup -> page_lookup_create -> page_lock_es code flow.) 1423*0Sstevel@tonic-gate * 1424*0Sstevel@tonic-gate * The appropriateness of the removal is based upon the following: 1425*0Sstevel@tonic-gate * 1. If we are holding a segment's reader lock and the page is held 1426*0Sstevel@tonic-gate * shared, then the corresponding element in anonmap which points to 1427*0Sstevel@tonic-gate * anon struct cannot change and there is no need to acquire the 1428*0Sstevel@tonic-gate * anonymous map lock. 1429*0Sstevel@tonic-gate * 2. Threads in segspt_softunlock have a reader lock on the segment 1430*0Sstevel@tonic-gate * and already have the shared page lock, so we are guaranteed that 1431*0Sstevel@tonic-gate * the anon map slot cannot change and therefore can call anon_get_ptr() 1432*0Sstevel@tonic-gate * without grabbing the anonymous map lock. 1433*0Sstevel@tonic-gate * 3. Threads that softlock a shared page break copy-on-write, even if 1434*0Sstevel@tonic-gate * its a read. Thus cow faults can be ignored with respect to soft 1435*0Sstevel@tonic-gate * unlocking, since the breaking of cow means that the anon slot(s) will 1436*0Sstevel@tonic-gate * not be shared. 1437*0Sstevel@tonic-gate */ 1438*0Sstevel@tonic-gate static void 1439*0Sstevel@tonic-gate segspt_softunlock(struct seg *seg, caddr_t sptseg_addr, 1440*0Sstevel@tonic-gate size_t len, enum seg_rw rw) 1441*0Sstevel@tonic-gate { 1442*0Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 1443*0Sstevel@tonic-gate struct seg *sptseg; 1444*0Sstevel@tonic-gate struct spt_data *sptd; 1445*0Sstevel@tonic-gate page_t *pp; 1446*0Sstevel@tonic-gate caddr_t adr; 1447*0Sstevel@tonic-gate struct vnode *vp; 1448*0Sstevel@tonic-gate u_offset_t offset; 1449*0Sstevel@tonic-gate ulong_t anon_index; 1450*0Sstevel@tonic-gate struct anon_map *amp; /* XXX - for locknest */ 1451*0Sstevel@tonic-gate struct anon *ap = NULL; 1452*0Sstevel@tonic-gate pgcnt_t npages; 1453*0Sstevel@tonic-gate 1454*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 1455*0Sstevel@tonic-gate 1456*0Sstevel@tonic-gate sptseg = shmd->shm_sptseg; 1457*0Sstevel@tonic-gate sptd = sptseg->s_data; 1458*0Sstevel@tonic-gate 1459*0Sstevel@tonic-gate /* 1460*0Sstevel@tonic-gate * Some platforms assume that ISM mappings are HAT_LOAD_LOCK 1461*0Sstevel@tonic-gate * and therefore their pages are SE_SHARED locked 1462*0Sstevel@tonic-gate * for the entire life of the segment. 1463*0Sstevel@tonic-gate */ 1464*0Sstevel@tonic-gate if ((!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) && 1465*0Sstevel@tonic-gate ((sptd->spt_flags & SHM_PAGEABLE) == 0)) { 1466*0Sstevel@tonic-gate goto softlock_decrement; 1467*0Sstevel@tonic-gate } 1468*0Sstevel@tonic-gate 1469*0Sstevel@tonic-gate /* 1470*0Sstevel@tonic-gate * Any thread is free to do a page_find and 1471*0Sstevel@tonic-gate * page_unlock() on the pages within this seg. 1472*0Sstevel@tonic-gate * 1473*0Sstevel@tonic-gate * We are already holding the as->a_lock on the user's 1474*0Sstevel@tonic-gate * real segment, but we need to hold the a_lock on the 1475*0Sstevel@tonic-gate * underlying dummy as. This is mostly to satisfy the 1476*0Sstevel@tonic-gate * underlying HAT layer. 1477*0Sstevel@tonic-gate */ 1478*0Sstevel@tonic-gate AS_LOCK_ENTER(sptseg->s_as, &sptseg->s_as->a_lock, RW_READER); 1479*0Sstevel@tonic-gate hat_unlock(sptseg->s_as->a_hat, sptseg_addr, len); 1480*0Sstevel@tonic-gate AS_LOCK_EXIT(sptseg->s_as, &sptseg->s_as->a_lock); 1481*0Sstevel@tonic-gate 1482*0Sstevel@tonic-gate amp = sptd->spt_amp; 1483*0Sstevel@tonic-gate ASSERT(amp != NULL); 1484*0Sstevel@tonic-gate anon_index = seg_page(sptseg, sptseg_addr); 1485*0Sstevel@tonic-gate 1486*0Sstevel@tonic-gate for (adr = sptseg_addr; adr < sptseg_addr + len; adr += PAGESIZE) { 1487*0Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, anon_index++); 1488*0Sstevel@tonic-gate ASSERT(ap != NULL); 1489*0Sstevel@tonic-gate swap_xlate(ap, &vp, &offset); 1490*0Sstevel@tonic-gate 1491*0Sstevel@tonic-gate /* 1492*0Sstevel@tonic-gate * Use page_find() instead of page_lookup() to 1493*0Sstevel@tonic-gate * find the page since we know that it has a 1494*0Sstevel@tonic-gate * "shared" lock. 1495*0Sstevel@tonic-gate */ 1496*0Sstevel@tonic-gate pp = page_find(vp, offset); 1497*0Sstevel@tonic-gate ASSERT(ap == anon_get_ptr(amp->ahp, anon_index - 1)); 1498*0Sstevel@tonic-gate if (pp == NULL) { 1499*0Sstevel@tonic-gate panic("segspt_softunlock: " 1500*0Sstevel@tonic-gate "addr %p, ap %p, vp %p, off %llx", 1501*0Sstevel@tonic-gate (void *)adr, (void *)ap, (void *)vp, offset); 1502*0Sstevel@tonic-gate /*NOTREACHED*/ 1503*0Sstevel@tonic-gate } 1504*0Sstevel@tonic-gate 1505*0Sstevel@tonic-gate if (rw == S_WRITE) { 1506*0Sstevel@tonic-gate hat_setrefmod(pp); 1507*0Sstevel@tonic-gate } else if (rw != S_OTHER) { 1508*0Sstevel@tonic-gate hat_setref(pp); 1509*0Sstevel@tonic-gate } 1510*0Sstevel@tonic-gate page_unlock(pp); 1511*0Sstevel@tonic-gate } 1512*0Sstevel@tonic-gate 1513*0Sstevel@tonic-gate softlock_decrement: 1514*0Sstevel@tonic-gate npages = btopr(len); 1515*0Sstevel@tonic-gate atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), -npages); 1516*0Sstevel@tonic-gate if (shmd->shm_softlockcnt == 0) { 1517*0Sstevel@tonic-gate /* 1518*0Sstevel@tonic-gate * All SOFTLOCKS are gone. Wakeup any waiting 1519*0Sstevel@tonic-gate * unmappers so they can try again to unmap. 1520*0Sstevel@tonic-gate * Check for waiters first without the mutex 1521*0Sstevel@tonic-gate * held so we don't always grab the mutex on 1522*0Sstevel@tonic-gate * softunlocks. 1523*0Sstevel@tonic-gate */ 1524*0Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 1525*0Sstevel@tonic-gate mutex_enter(&seg->s_as->a_contents); 1526*0Sstevel@tonic-gate if (AS_ISUNMAPWAIT(seg->s_as)) { 1527*0Sstevel@tonic-gate AS_CLRUNMAPWAIT(seg->s_as); 1528*0Sstevel@tonic-gate cv_broadcast(&seg->s_as->a_cv); 1529*0Sstevel@tonic-gate } 1530*0Sstevel@tonic-gate mutex_exit(&seg->s_as->a_contents); 1531*0Sstevel@tonic-gate } 1532*0Sstevel@tonic-gate } 1533*0Sstevel@tonic-gate } 1534*0Sstevel@tonic-gate 1535*0Sstevel@tonic-gate int 1536*0Sstevel@tonic-gate segspt_shmattach(struct seg *seg, caddr_t *argsp) 1537*0Sstevel@tonic-gate { 1538*0Sstevel@tonic-gate struct shm_data *shmd_arg = (struct shm_data *)argsp; 1539*0Sstevel@tonic-gate struct shm_data *shmd; 1540*0Sstevel@tonic-gate struct anon_map *shm_amp = shmd_arg->shm_amp; 1541*0Sstevel@tonic-gate struct spt_data *sptd; 1542*0Sstevel@tonic-gate int error = 0; 1543*0Sstevel@tonic-gate 1544*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 1545*0Sstevel@tonic-gate 1546*0Sstevel@tonic-gate shmd = kmem_zalloc((sizeof (*shmd)), KM_NOSLEEP); 1547*0Sstevel@tonic-gate if (shmd == NULL) 1548*0Sstevel@tonic-gate return (ENOMEM); 1549*0Sstevel@tonic-gate 1550*0Sstevel@tonic-gate shmd->shm_sptas = shmd_arg->shm_sptas; 1551*0Sstevel@tonic-gate shmd->shm_amp = shm_amp; 1552*0Sstevel@tonic-gate shmd->shm_sptseg = shmd_arg->shm_sptseg; 1553*0Sstevel@tonic-gate 1554*0Sstevel@tonic-gate (void) lgrp_shm_policy_set(LGRP_MEM_POLICY_DEFAULT, shm_amp, 0, 1555*0Sstevel@tonic-gate NULL, 0, seg->s_size); 1556*0Sstevel@tonic-gate 1557*0Sstevel@tonic-gate seg->s_data = (void *)shmd; 1558*0Sstevel@tonic-gate seg->s_ops = &segspt_shmops; 1559*0Sstevel@tonic-gate seg->s_szc = shmd->shm_sptseg->s_szc; 1560*0Sstevel@tonic-gate sptd = shmd->shm_sptseg->s_data; 1561*0Sstevel@tonic-gate 1562*0Sstevel@tonic-gate if (sptd->spt_flags & SHM_PAGEABLE) { 1563*0Sstevel@tonic-gate if ((shmd->shm_vpage = kmem_zalloc(btopr(shm_amp->size), 1564*0Sstevel@tonic-gate KM_NOSLEEP)) == NULL) { 1565*0Sstevel@tonic-gate seg->s_data = (void *)NULL; 1566*0Sstevel@tonic-gate kmem_free(shmd, (sizeof (*shmd))); 1567*0Sstevel@tonic-gate return (ENOMEM); 1568*0Sstevel@tonic-gate } 1569*0Sstevel@tonic-gate shmd->shm_lckpgs = 0; 1570*0Sstevel@tonic-gate if (hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) { 1571*0Sstevel@tonic-gate if ((error = hat_share(seg->s_as->a_hat, seg->s_base, 1572*0Sstevel@tonic-gate shmd_arg->shm_sptas->a_hat, SEGSPTADDR, 1573*0Sstevel@tonic-gate seg->s_size, seg->s_szc)) != 0) { 1574*0Sstevel@tonic-gate kmem_free(shmd->shm_vpage, 1575*0Sstevel@tonic-gate btopr(shm_amp->size)); 1576*0Sstevel@tonic-gate } 1577*0Sstevel@tonic-gate } 1578*0Sstevel@tonic-gate } else { 1579*0Sstevel@tonic-gate error = hat_share(seg->s_as->a_hat, seg->s_base, 1580*0Sstevel@tonic-gate shmd_arg->shm_sptas->a_hat, SEGSPTADDR, 1581*0Sstevel@tonic-gate seg->s_size, seg->s_szc); 1582*0Sstevel@tonic-gate } 1583*0Sstevel@tonic-gate if (error) { 1584*0Sstevel@tonic-gate seg->s_szc = 0; 1585*0Sstevel@tonic-gate seg->s_data = (void *)NULL; 1586*0Sstevel@tonic-gate kmem_free(shmd, (sizeof (*shmd))); 1587*0Sstevel@tonic-gate } else { 1588*0Sstevel@tonic-gate ANON_LOCK_ENTER(&shm_amp->a_rwlock, RW_WRITER); 1589*0Sstevel@tonic-gate shm_amp->refcnt++; 1590*0Sstevel@tonic-gate ANON_LOCK_EXIT(&shm_amp->a_rwlock); 1591*0Sstevel@tonic-gate } 1592*0Sstevel@tonic-gate return (error); 1593*0Sstevel@tonic-gate } 1594*0Sstevel@tonic-gate 1595*0Sstevel@tonic-gate int 1596*0Sstevel@tonic-gate segspt_shmunmap(struct seg *seg, caddr_t raddr, size_t ssize) 1597*0Sstevel@tonic-gate { 1598*0Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 1599*0Sstevel@tonic-gate int reclaim = 1; 1600*0Sstevel@tonic-gate 1601*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 1602*0Sstevel@tonic-gate retry: 1603*0Sstevel@tonic-gate if (shmd->shm_softlockcnt > 0) { 1604*0Sstevel@tonic-gate if (reclaim == 1) { 1605*0Sstevel@tonic-gate segspt_purge(seg); 1606*0Sstevel@tonic-gate reclaim = 0; 1607*0Sstevel@tonic-gate goto retry; 1608*0Sstevel@tonic-gate } 1609*0Sstevel@tonic-gate return (EAGAIN); 1610*0Sstevel@tonic-gate } 1611*0Sstevel@tonic-gate 1612*0Sstevel@tonic-gate if (ssize != seg->s_size) { 1613*0Sstevel@tonic-gate #ifdef DEBUG 1614*0Sstevel@tonic-gate cmn_err(CE_WARN, "Incompatible ssize %lx s_size %lx\n", 1615*0Sstevel@tonic-gate ssize, seg->s_size); 1616*0Sstevel@tonic-gate #endif 1617*0Sstevel@tonic-gate return (EINVAL); 1618*0Sstevel@tonic-gate } 1619*0Sstevel@tonic-gate 1620*0Sstevel@tonic-gate (void) segspt_shmlockop(seg, raddr, shmd->shm_amp->size, 0, MC_UNLOCK, 1621*0Sstevel@tonic-gate NULL, 0); 1622*0Sstevel@tonic-gate hat_unshare(seg->s_as->a_hat, raddr, ssize, seg->s_szc); 1623*0Sstevel@tonic-gate 1624*0Sstevel@tonic-gate seg_free(seg); 1625*0Sstevel@tonic-gate 1626*0Sstevel@tonic-gate return (0); 1627*0Sstevel@tonic-gate } 1628*0Sstevel@tonic-gate 1629*0Sstevel@tonic-gate void 1630*0Sstevel@tonic-gate segspt_shmfree(struct seg *seg) 1631*0Sstevel@tonic-gate { 1632*0Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 1633*0Sstevel@tonic-gate struct anon_map *shm_amp = shmd->shm_amp; 1634*0Sstevel@tonic-gate 1635*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 1636*0Sstevel@tonic-gate 1637*0Sstevel@tonic-gate (void) segspt_shmlockop(seg, seg->s_base, shm_amp->size, 0, 1638*0Sstevel@tonic-gate MC_UNLOCK, NULL, 0); 1639*0Sstevel@tonic-gate 1640*0Sstevel@tonic-gate /* 1641*0Sstevel@tonic-gate * Need to increment refcnt when attaching 1642*0Sstevel@tonic-gate * and decrement when detaching because of dup(). 1643*0Sstevel@tonic-gate */ 1644*0Sstevel@tonic-gate ANON_LOCK_ENTER(&shm_amp->a_rwlock, RW_WRITER); 1645*0Sstevel@tonic-gate shm_amp->refcnt--; 1646*0Sstevel@tonic-gate ANON_LOCK_EXIT(&shm_amp->a_rwlock); 1647*0Sstevel@tonic-gate 1648*0Sstevel@tonic-gate if (shmd->shm_vpage) { /* only for DISM */ 1649*0Sstevel@tonic-gate kmem_free(shmd->shm_vpage, btopr(shm_amp->size)); 1650*0Sstevel@tonic-gate shmd->shm_vpage = NULL; 1651*0Sstevel@tonic-gate } 1652*0Sstevel@tonic-gate kmem_free(shmd, sizeof (*shmd)); 1653*0Sstevel@tonic-gate } 1654*0Sstevel@tonic-gate 1655*0Sstevel@tonic-gate /*ARGSUSED*/ 1656*0Sstevel@tonic-gate int 1657*0Sstevel@tonic-gate segspt_shmsetprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 1658*0Sstevel@tonic-gate { 1659*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 1660*0Sstevel@tonic-gate 1661*0Sstevel@tonic-gate /* 1662*0Sstevel@tonic-gate * Shared page table is more than shared mapping. 1663*0Sstevel@tonic-gate * Individual process sharing page tables can't change prot 1664*0Sstevel@tonic-gate * because there is only one set of page tables. 1665*0Sstevel@tonic-gate * This will be allowed after private page table is 1666*0Sstevel@tonic-gate * supported. 1667*0Sstevel@tonic-gate */ 1668*0Sstevel@tonic-gate /* need to return correct status error? */ 1669*0Sstevel@tonic-gate return (0); 1670*0Sstevel@tonic-gate } 1671*0Sstevel@tonic-gate 1672*0Sstevel@tonic-gate 1673*0Sstevel@tonic-gate faultcode_t 1674*0Sstevel@tonic-gate segspt_dismfault(struct hat *hat, struct seg *seg, caddr_t addr, 1675*0Sstevel@tonic-gate size_t len, enum fault_type type, enum seg_rw rw) 1676*0Sstevel@tonic-gate { 1677*0Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 1678*0Sstevel@tonic-gate struct seg *sptseg = shmd->shm_sptseg; 1679*0Sstevel@tonic-gate struct as *curspt = shmd->shm_sptas; 1680*0Sstevel@tonic-gate struct spt_data *sptd = sptseg->s_data; 1681*0Sstevel@tonic-gate pgcnt_t npages; 1682*0Sstevel@tonic-gate size_t share_sz, size; 1683*0Sstevel@tonic-gate caddr_t segspt_addr, shm_addr; 1684*0Sstevel@tonic-gate page_t **ppa; 1685*0Sstevel@tonic-gate int i; 1686*0Sstevel@tonic-gate ulong_t an_idx = 0; 1687*0Sstevel@tonic-gate int err = 0; 1688*0Sstevel@tonic-gate 1689*0Sstevel@tonic-gate #ifdef lint 1690*0Sstevel@tonic-gate hat = hat; 1691*0Sstevel@tonic-gate #endif 1692*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 1693*0Sstevel@tonic-gate 1694*0Sstevel@tonic-gate /* 1695*0Sstevel@tonic-gate * Because of the way spt is implemented 1696*0Sstevel@tonic-gate * the realsize of the segment does not have to be 1697*0Sstevel@tonic-gate * equal to the segment size itself. The segment size is 1698*0Sstevel@tonic-gate * often in multiples of a page size larger than PAGESIZE. 1699*0Sstevel@tonic-gate * The realsize is rounded up to the nearest PAGESIZE 1700*0Sstevel@tonic-gate * based on what the user requested. This is a bit of 1701*0Sstevel@tonic-gate * ungliness that is historical but not easily fixed 1702*0Sstevel@tonic-gate * without re-designing the higher levels of ISM. 1703*0Sstevel@tonic-gate */ 1704*0Sstevel@tonic-gate ASSERT(addr >= seg->s_base); 1705*0Sstevel@tonic-gate if (((addr + len) - seg->s_base) > sptd->spt_realsize) 1706*0Sstevel@tonic-gate return (FC_NOMAP); 1707*0Sstevel@tonic-gate /* 1708*0Sstevel@tonic-gate * For all of the following cases except F_PROT, we need to 1709*0Sstevel@tonic-gate * make any necessary adjustments to addr and len 1710*0Sstevel@tonic-gate * and get all of the necessary page_t's into an array called ppa[]. 1711*0Sstevel@tonic-gate * 1712*0Sstevel@tonic-gate * The code in shmat() forces base addr and len of ISM segment 1713*0Sstevel@tonic-gate * to be aligned to largest page size supported. Therefore, 1714*0Sstevel@tonic-gate * we are able to handle F_SOFTLOCK and F_INVAL calls in "large 1715*0Sstevel@tonic-gate * pagesize" chunks. We want to make sure that we HAT_LOAD_LOCK 1716*0Sstevel@tonic-gate * in large pagesize chunks, or else we will screw up the HAT 1717*0Sstevel@tonic-gate * layer by calling hat_memload_array() with differing page sizes 1718*0Sstevel@tonic-gate * over a given virtual range. 1719*0Sstevel@tonic-gate */ 1720*0Sstevel@tonic-gate share_sz = page_get_pagesize(sptseg->s_szc); 1721*0Sstevel@tonic-gate shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), share_sz); 1722*0Sstevel@tonic-gate size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), share_sz); 1723*0Sstevel@tonic-gate npages = btopr(size); 1724*0Sstevel@tonic-gate 1725*0Sstevel@tonic-gate /* 1726*0Sstevel@tonic-gate * Now we need to convert from addr in segshm to addr in segspt. 1727*0Sstevel@tonic-gate */ 1728*0Sstevel@tonic-gate an_idx = seg_page(seg, shm_addr); 1729*0Sstevel@tonic-gate segspt_addr = sptseg->s_base + ptob(an_idx); 1730*0Sstevel@tonic-gate 1731*0Sstevel@tonic-gate ASSERT((segspt_addr + ptob(npages)) <= 1732*0Sstevel@tonic-gate (sptseg->s_base + sptd->spt_realsize)); 1733*0Sstevel@tonic-gate ASSERT(segspt_addr < (sptseg->s_base + sptseg->s_size)); 1734*0Sstevel@tonic-gate 1735*0Sstevel@tonic-gate switch (type) { 1736*0Sstevel@tonic-gate 1737*0Sstevel@tonic-gate case F_SOFTLOCK: 1738*0Sstevel@tonic-gate 1739*0Sstevel@tonic-gate mutex_enter(&freemem_lock); 1740*0Sstevel@tonic-gate if (availrmem < tune.t_minarmem + npages) { 1741*0Sstevel@tonic-gate mutex_exit(&freemem_lock); 1742*0Sstevel@tonic-gate return (FC_MAKE_ERR(ENOMEM)); 1743*0Sstevel@tonic-gate } else { 1744*0Sstevel@tonic-gate availrmem -= npages; 1745*0Sstevel@tonic-gate } 1746*0Sstevel@tonic-gate mutex_exit(&freemem_lock); 1747*0Sstevel@tonic-gate atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), npages); 1748*0Sstevel@tonic-gate /* 1749*0Sstevel@tonic-gate * Fall through to the F_INVAL case to load up the hat layer 1750*0Sstevel@tonic-gate * entries with the HAT_LOAD_LOCK flag. 1751*0Sstevel@tonic-gate */ 1752*0Sstevel@tonic-gate /* FALLTHRU */ 1753*0Sstevel@tonic-gate case F_INVAL: 1754*0Sstevel@tonic-gate 1755*0Sstevel@tonic-gate if ((rw == S_EXEC) && !(sptd->spt_prot & PROT_EXEC)) 1756*0Sstevel@tonic-gate return (FC_NOMAP); 1757*0Sstevel@tonic-gate 1758*0Sstevel@tonic-gate ppa = kmem_zalloc(npages * sizeof (page_t *), KM_SLEEP); 1759*0Sstevel@tonic-gate 1760*0Sstevel@tonic-gate err = spt_anon_getpages(sptseg, segspt_addr, size, ppa); 1761*0Sstevel@tonic-gate if (err != 0) { 1762*0Sstevel@tonic-gate if (type == F_SOFTLOCK) { 1763*0Sstevel@tonic-gate mutex_enter(&freemem_lock); 1764*0Sstevel@tonic-gate availrmem += npages; 1765*0Sstevel@tonic-gate mutex_exit(&freemem_lock); 1766*0Sstevel@tonic-gate atomic_add_long((ulong_t *)( 1767*0Sstevel@tonic-gate &(shmd->shm_softlockcnt)), -npages); 1768*0Sstevel@tonic-gate } 1769*0Sstevel@tonic-gate goto dism_err; 1770*0Sstevel@tonic-gate } 1771*0Sstevel@tonic-gate AS_LOCK_ENTER(sptseg->s_as, &sptseg->s_as->a_lock, RW_READER); 1772*0Sstevel@tonic-gate if (type == F_SOFTLOCK) { 1773*0Sstevel@tonic-gate 1774*0Sstevel@tonic-gate /* 1775*0Sstevel@tonic-gate * Load up the translation keeping it 1776*0Sstevel@tonic-gate * locked and don't unlock the page. 1777*0Sstevel@tonic-gate */ 1778*0Sstevel@tonic-gate hat_memload_array(sptseg->s_as->a_hat, segspt_addr, 1779*0Sstevel@tonic-gate size, ppa, sptd->spt_prot, 1780*0Sstevel@tonic-gate HAT_LOAD_LOCK | HAT_LOAD_SHARE); 1781*0Sstevel@tonic-gate } else { 1782*0Sstevel@tonic-gate if (hat == seg->s_as->a_hat) { 1783*0Sstevel@tonic-gate 1784*0Sstevel@tonic-gate /* 1785*0Sstevel@tonic-gate * Migrate pages marked for migration 1786*0Sstevel@tonic-gate */ 1787*0Sstevel@tonic-gate if (lgrp_optimizations()) 1788*0Sstevel@tonic-gate page_migrate(seg, shm_addr, ppa, 1789*0Sstevel@tonic-gate npages); 1790*0Sstevel@tonic-gate 1791*0Sstevel@tonic-gate /* CPU HAT */ 1792*0Sstevel@tonic-gate hat_memload_array(sptseg->s_as->a_hat, 1793*0Sstevel@tonic-gate segspt_addr, size, ppa, sptd->spt_prot, 1794*0Sstevel@tonic-gate HAT_LOAD_SHARE); 1795*0Sstevel@tonic-gate } else { 1796*0Sstevel@tonic-gate /* XHAT. Pass real address */ 1797*0Sstevel@tonic-gate hat_memload_array(hat, shm_addr, 1798*0Sstevel@tonic-gate size, ppa, sptd->spt_prot, HAT_LOAD_SHARE); 1799*0Sstevel@tonic-gate } 1800*0Sstevel@tonic-gate 1801*0Sstevel@tonic-gate /* 1802*0Sstevel@tonic-gate * And now drop the SE_SHARED lock(s). 1803*0Sstevel@tonic-gate */ 1804*0Sstevel@tonic-gate for (i = 0; i < npages; i++) 1805*0Sstevel@tonic-gate page_unlock(ppa[i]); 1806*0Sstevel@tonic-gate } 1807*0Sstevel@tonic-gate 1808*0Sstevel@tonic-gate if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) { 1809*0Sstevel@tonic-gate if (hat_share(seg->s_as->a_hat, shm_addr, 1810*0Sstevel@tonic-gate curspt->a_hat, segspt_addr, ptob(npages), 1811*0Sstevel@tonic-gate seg->s_szc) != 0) { 1812*0Sstevel@tonic-gate panic("hat_share err in DISM fault"); 1813*0Sstevel@tonic-gate /* NOTREACHED */ 1814*0Sstevel@tonic-gate } 1815*0Sstevel@tonic-gate } 1816*0Sstevel@tonic-gate AS_LOCK_EXIT(sptseg->s_as, &sptseg->s_as->a_lock); 1817*0Sstevel@tonic-gate dism_err: 1818*0Sstevel@tonic-gate kmem_free(ppa, npages * sizeof (page_t *)); 1819*0Sstevel@tonic-gate return (err); 1820*0Sstevel@tonic-gate 1821*0Sstevel@tonic-gate case F_SOFTUNLOCK: 1822*0Sstevel@tonic-gate 1823*0Sstevel@tonic-gate mutex_enter(&freemem_lock); 1824*0Sstevel@tonic-gate availrmem += npages; 1825*0Sstevel@tonic-gate mutex_exit(&freemem_lock); 1826*0Sstevel@tonic-gate 1827*0Sstevel@tonic-gate /* 1828*0Sstevel@tonic-gate * This is a bit ugly, we pass in the real seg pointer, 1829*0Sstevel@tonic-gate * but the segspt_addr is the virtual address within the 1830*0Sstevel@tonic-gate * dummy seg. 1831*0Sstevel@tonic-gate */ 1832*0Sstevel@tonic-gate segspt_softunlock(seg, segspt_addr, size, rw); 1833*0Sstevel@tonic-gate return (0); 1834*0Sstevel@tonic-gate 1835*0Sstevel@tonic-gate case F_PROT: 1836*0Sstevel@tonic-gate 1837*0Sstevel@tonic-gate /* 1838*0Sstevel@tonic-gate * This takes care of the unusual case where a user 1839*0Sstevel@tonic-gate * allocates a stack in shared memory and a register 1840*0Sstevel@tonic-gate * window overflow is written to that stack page before 1841*0Sstevel@tonic-gate * it is otherwise modified. 1842*0Sstevel@tonic-gate * 1843*0Sstevel@tonic-gate * We can get away with this because ISM segments are 1844*0Sstevel@tonic-gate * always rw. Other than this unusual case, there 1845*0Sstevel@tonic-gate * should be no instances of protection violations. 1846*0Sstevel@tonic-gate */ 1847*0Sstevel@tonic-gate return (0); 1848*0Sstevel@tonic-gate 1849*0Sstevel@tonic-gate default: 1850*0Sstevel@tonic-gate #ifdef DEBUG 1851*0Sstevel@tonic-gate panic("segspt_dismfault default type?"); 1852*0Sstevel@tonic-gate #else 1853*0Sstevel@tonic-gate return (FC_NOMAP); 1854*0Sstevel@tonic-gate #endif 1855*0Sstevel@tonic-gate } 1856*0Sstevel@tonic-gate } 1857*0Sstevel@tonic-gate 1858*0Sstevel@tonic-gate 1859*0Sstevel@tonic-gate faultcode_t 1860*0Sstevel@tonic-gate segspt_shmfault(struct hat *hat, struct seg *seg, caddr_t addr, 1861*0Sstevel@tonic-gate size_t len, enum fault_type type, enum seg_rw rw) 1862*0Sstevel@tonic-gate { 1863*0Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 1864*0Sstevel@tonic-gate struct seg *sptseg = shmd->shm_sptseg; 1865*0Sstevel@tonic-gate struct as *curspt = shmd->shm_sptas; 1866*0Sstevel@tonic-gate struct spt_data *sptd = sptseg->s_data; 1867*0Sstevel@tonic-gate pgcnt_t npages; 1868*0Sstevel@tonic-gate size_t share_size, size; 1869*0Sstevel@tonic-gate caddr_t sptseg_addr, shm_addr; 1870*0Sstevel@tonic-gate page_t *pp, **ppa; 1871*0Sstevel@tonic-gate int i; 1872*0Sstevel@tonic-gate u_offset_t offset; 1873*0Sstevel@tonic-gate ulong_t anon_index = 0; 1874*0Sstevel@tonic-gate struct vnode *vp; 1875*0Sstevel@tonic-gate struct anon_map *amp; /* XXX - for locknest */ 1876*0Sstevel@tonic-gate struct anon *ap = NULL; 1877*0Sstevel@tonic-gate anon_sync_obj_t cookie; 1878*0Sstevel@tonic-gate 1879*0Sstevel@tonic-gate #ifdef lint 1880*0Sstevel@tonic-gate hat = hat; 1881*0Sstevel@tonic-gate #endif 1882*0Sstevel@tonic-gate 1883*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 1884*0Sstevel@tonic-gate 1885*0Sstevel@tonic-gate if (sptd->spt_flags & SHM_PAGEABLE) { 1886*0Sstevel@tonic-gate return (segspt_dismfault(hat, seg, addr, len, type, rw)); 1887*0Sstevel@tonic-gate } 1888*0Sstevel@tonic-gate 1889*0Sstevel@tonic-gate /* 1890*0Sstevel@tonic-gate * Because of the way spt is implemented 1891*0Sstevel@tonic-gate * the realsize of the segment does not have to be 1892*0Sstevel@tonic-gate * equal to the segment size itself. The segment size is 1893*0Sstevel@tonic-gate * often in multiples of a page size larger than PAGESIZE. 1894*0Sstevel@tonic-gate * The realsize is rounded up to the nearest PAGESIZE 1895*0Sstevel@tonic-gate * based on what the user requested. This is a bit of 1896*0Sstevel@tonic-gate * ungliness that is historical but not easily fixed 1897*0Sstevel@tonic-gate * without re-designing the higher levels of ISM. 1898*0Sstevel@tonic-gate */ 1899*0Sstevel@tonic-gate ASSERT(addr >= seg->s_base); 1900*0Sstevel@tonic-gate if (((addr + len) - seg->s_base) > sptd->spt_realsize) 1901*0Sstevel@tonic-gate return (FC_NOMAP); 1902*0Sstevel@tonic-gate /* 1903*0Sstevel@tonic-gate * For all of the following cases except F_PROT, we need to 1904*0Sstevel@tonic-gate * make any necessary adjustments to addr and len 1905*0Sstevel@tonic-gate * and get all of the necessary page_t's into an array called ppa[]. 1906*0Sstevel@tonic-gate * 1907*0Sstevel@tonic-gate * The code in shmat() forces base addr and len of ISM segment 1908*0Sstevel@tonic-gate * to be aligned to largest page size supported. Therefore, 1909*0Sstevel@tonic-gate * we are able to handle F_SOFTLOCK and F_INVAL calls in "large 1910*0Sstevel@tonic-gate * pagesize" chunks. We want to make sure that we HAT_LOAD_LOCK 1911*0Sstevel@tonic-gate * in large pagesize chunks, or else we will screw up the HAT 1912*0Sstevel@tonic-gate * layer by calling hat_memload_array() with differing page sizes 1913*0Sstevel@tonic-gate * over a given virtual range. 1914*0Sstevel@tonic-gate */ 1915*0Sstevel@tonic-gate share_size = page_get_pagesize(sptseg->s_szc); 1916*0Sstevel@tonic-gate shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), share_size); 1917*0Sstevel@tonic-gate size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), share_size); 1918*0Sstevel@tonic-gate npages = btopr(size); 1919*0Sstevel@tonic-gate 1920*0Sstevel@tonic-gate /* 1921*0Sstevel@tonic-gate * Now we need to convert from addr in segshm to addr in segspt. 1922*0Sstevel@tonic-gate */ 1923*0Sstevel@tonic-gate anon_index = seg_page(seg, shm_addr); 1924*0Sstevel@tonic-gate sptseg_addr = sptseg->s_base + ptob(anon_index); 1925*0Sstevel@tonic-gate 1926*0Sstevel@tonic-gate /* 1927*0Sstevel@tonic-gate * And now we may have to adjust npages downward if we have 1928*0Sstevel@tonic-gate * exceeded the realsize of the segment or initial anon 1929*0Sstevel@tonic-gate * allocations. 1930*0Sstevel@tonic-gate */ 1931*0Sstevel@tonic-gate if ((sptseg_addr + ptob(npages)) > 1932*0Sstevel@tonic-gate (sptseg->s_base + sptd->spt_realsize)) 1933*0Sstevel@tonic-gate size = (sptseg->s_base + sptd->spt_realsize) - sptseg_addr; 1934*0Sstevel@tonic-gate 1935*0Sstevel@tonic-gate npages = btopr(size); 1936*0Sstevel@tonic-gate 1937*0Sstevel@tonic-gate ASSERT(sptseg_addr < (sptseg->s_base + sptseg->s_size)); 1938*0Sstevel@tonic-gate ASSERT((sptd->spt_flags & SHM_PAGEABLE) == 0); 1939*0Sstevel@tonic-gate 1940*0Sstevel@tonic-gate switch (type) { 1941*0Sstevel@tonic-gate 1942*0Sstevel@tonic-gate case F_SOFTLOCK: 1943*0Sstevel@tonic-gate 1944*0Sstevel@tonic-gate /* 1945*0Sstevel@tonic-gate * availrmem is decremented once during anon_swap_adjust() 1946*0Sstevel@tonic-gate * and is incremented during the anon_unresv(), which is 1947*0Sstevel@tonic-gate * called from shm_rm_amp() when the segment is destroyed. 1948*0Sstevel@tonic-gate */ 1949*0Sstevel@tonic-gate atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), npages); 1950*0Sstevel@tonic-gate /* 1951*0Sstevel@tonic-gate * Some platforms assume that ISM pages are SE_SHARED 1952*0Sstevel@tonic-gate * locked for the entire life of the segment. 1953*0Sstevel@tonic-gate */ 1954*0Sstevel@tonic-gate if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) 1955*0Sstevel@tonic-gate return (0); 1956*0Sstevel@tonic-gate /* 1957*0Sstevel@tonic-gate * Fall through to the F_INVAL case to load up the hat layer 1958*0Sstevel@tonic-gate * entries with the HAT_LOAD_LOCK flag. 1959*0Sstevel@tonic-gate */ 1960*0Sstevel@tonic-gate 1961*0Sstevel@tonic-gate /* FALLTHRU */ 1962*0Sstevel@tonic-gate case F_INVAL: 1963*0Sstevel@tonic-gate 1964*0Sstevel@tonic-gate if ((rw == S_EXEC) && !(sptd->spt_prot & PROT_EXEC)) 1965*0Sstevel@tonic-gate return (FC_NOMAP); 1966*0Sstevel@tonic-gate 1967*0Sstevel@tonic-gate /* 1968*0Sstevel@tonic-gate * Some platforms that do NOT support DYNAMIC_ISM_UNMAP 1969*0Sstevel@tonic-gate * may still rely on this call to hat_share(). That 1970*0Sstevel@tonic-gate * would imply that those hat's can fault on a 1971*0Sstevel@tonic-gate * HAT_LOAD_LOCK translation, which would seem 1972*0Sstevel@tonic-gate * contradictory. 1973*0Sstevel@tonic-gate */ 1974*0Sstevel@tonic-gate if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) { 1975*0Sstevel@tonic-gate if (hat_share(seg->s_as->a_hat, seg->s_base, 1976*0Sstevel@tonic-gate curspt->a_hat, sptseg->s_base, 1977*0Sstevel@tonic-gate sptseg->s_size, sptseg->s_szc) != 0) { 1978*0Sstevel@tonic-gate panic("hat_share error in ISM fault"); 1979*0Sstevel@tonic-gate /*NOTREACHED*/ 1980*0Sstevel@tonic-gate } 1981*0Sstevel@tonic-gate return (0); 1982*0Sstevel@tonic-gate } 1983*0Sstevel@tonic-gate ppa = kmem_zalloc(sizeof (page_t *) * npages, KM_SLEEP); 1984*0Sstevel@tonic-gate 1985*0Sstevel@tonic-gate /* 1986*0Sstevel@tonic-gate * I see no need to lock the real seg, 1987*0Sstevel@tonic-gate * here, because all of our work will be on the underlying 1988*0Sstevel@tonic-gate * dummy seg. 1989*0Sstevel@tonic-gate * 1990*0Sstevel@tonic-gate * sptseg_addr and npages now account for large pages. 1991*0Sstevel@tonic-gate */ 1992*0Sstevel@tonic-gate amp = sptd->spt_amp; 1993*0Sstevel@tonic-gate ASSERT(amp != NULL); 1994*0Sstevel@tonic-gate anon_index = seg_page(sptseg, sptseg_addr); 1995*0Sstevel@tonic-gate 1996*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 1997*0Sstevel@tonic-gate for (i = 0; i < npages; i++) { 1998*0Sstevel@tonic-gate anon_array_enter(amp, anon_index, &cookie); 1999*0Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, anon_index++); 2000*0Sstevel@tonic-gate ASSERT(ap != NULL); 2001*0Sstevel@tonic-gate swap_xlate(ap, &vp, &offset); 2002*0Sstevel@tonic-gate anon_array_exit(&cookie); 2003*0Sstevel@tonic-gate pp = page_lookup(vp, offset, SE_SHARED); 2004*0Sstevel@tonic-gate ASSERT(pp != NULL); 2005*0Sstevel@tonic-gate ppa[i] = pp; 2006*0Sstevel@tonic-gate } 2007*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 2008*0Sstevel@tonic-gate ASSERT(i == npages); 2009*0Sstevel@tonic-gate 2010*0Sstevel@tonic-gate /* 2011*0Sstevel@tonic-gate * We are already holding the as->a_lock on the user's 2012*0Sstevel@tonic-gate * real segment, but we need to hold the a_lock on the 2013*0Sstevel@tonic-gate * underlying dummy as. This is mostly to satisfy the 2014*0Sstevel@tonic-gate * underlying HAT layer. 2015*0Sstevel@tonic-gate */ 2016*0Sstevel@tonic-gate AS_LOCK_ENTER(sptseg->s_as, &sptseg->s_as->a_lock, RW_READER); 2017*0Sstevel@tonic-gate if (type == F_SOFTLOCK) { 2018*0Sstevel@tonic-gate /* 2019*0Sstevel@tonic-gate * Load up the translation keeping it 2020*0Sstevel@tonic-gate * locked and don't unlock the page. 2021*0Sstevel@tonic-gate */ 2022*0Sstevel@tonic-gate hat_memload_array(sptseg->s_as->a_hat, sptseg_addr, 2023*0Sstevel@tonic-gate ptob(npages), ppa, sptd->spt_prot, 2024*0Sstevel@tonic-gate HAT_LOAD_LOCK | HAT_LOAD_SHARE); 2025*0Sstevel@tonic-gate } else { 2026*0Sstevel@tonic-gate if (hat == seg->s_as->a_hat) { 2027*0Sstevel@tonic-gate 2028*0Sstevel@tonic-gate /* 2029*0Sstevel@tonic-gate * Migrate pages marked for migration. 2030*0Sstevel@tonic-gate */ 2031*0Sstevel@tonic-gate if (lgrp_optimizations()) 2032*0Sstevel@tonic-gate page_migrate(seg, shm_addr, ppa, 2033*0Sstevel@tonic-gate npages); 2034*0Sstevel@tonic-gate 2035*0Sstevel@tonic-gate /* CPU HAT */ 2036*0Sstevel@tonic-gate hat_memload_array(sptseg->s_as->a_hat, 2037*0Sstevel@tonic-gate sptseg_addr, ptob(npages), ppa, 2038*0Sstevel@tonic-gate sptd->spt_prot, HAT_LOAD_SHARE); 2039*0Sstevel@tonic-gate } else { 2040*0Sstevel@tonic-gate /* XHAT. Pass real address */ 2041*0Sstevel@tonic-gate hat_memload_array(hat, shm_addr, 2042*0Sstevel@tonic-gate ptob(npages), ppa, sptd->spt_prot, 2043*0Sstevel@tonic-gate HAT_LOAD_SHARE); 2044*0Sstevel@tonic-gate } 2045*0Sstevel@tonic-gate 2046*0Sstevel@tonic-gate /* 2047*0Sstevel@tonic-gate * And now drop the SE_SHARED lock(s). 2048*0Sstevel@tonic-gate */ 2049*0Sstevel@tonic-gate for (i = 0; i < npages; i++) 2050*0Sstevel@tonic-gate page_unlock(ppa[i]); 2051*0Sstevel@tonic-gate } 2052*0Sstevel@tonic-gate AS_LOCK_EXIT(sptseg->s_as, &sptseg->s_as->a_lock); 2053*0Sstevel@tonic-gate 2054*0Sstevel@tonic-gate kmem_free(ppa, sizeof (page_t *) * npages); 2055*0Sstevel@tonic-gate return (0); 2056*0Sstevel@tonic-gate case F_SOFTUNLOCK: 2057*0Sstevel@tonic-gate 2058*0Sstevel@tonic-gate /* 2059*0Sstevel@tonic-gate * This is a bit ugly, we pass in the real seg pointer, 2060*0Sstevel@tonic-gate * but the sptseg_addr is the virtual address within the 2061*0Sstevel@tonic-gate * dummy seg. 2062*0Sstevel@tonic-gate */ 2063*0Sstevel@tonic-gate segspt_softunlock(seg, sptseg_addr, ptob(npages), rw); 2064*0Sstevel@tonic-gate return (0); 2065*0Sstevel@tonic-gate 2066*0Sstevel@tonic-gate case F_PROT: 2067*0Sstevel@tonic-gate 2068*0Sstevel@tonic-gate /* 2069*0Sstevel@tonic-gate * This takes care of the unusual case where a user 2070*0Sstevel@tonic-gate * allocates a stack in shared memory and a register 2071*0Sstevel@tonic-gate * window overflow is written to that stack page before 2072*0Sstevel@tonic-gate * it is otherwise modified. 2073*0Sstevel@tonic-gate * 2074*0Sstevel@tonic-gate * We can get away with this because ISM segments are 2075*0Sstevel@tonic-gate * always rw. Other than this unusual case, there 2076*0Sstevel@tonic-gate * should be no instances of protection violations. 2077*0Sstevel@tonic-gate */ 2078*0Sstevel@tonic-gate return (0); 2079*0Sstevel@tonic-gate 2080*0Sstevel@tonic-gate default: 2081*0Sstevel@tonic-gate #ifdef DEBUG 2082*0Sstevel@tonic-gate cmn_err(CE_WARN, "segspt_shmfault default type?"); 2083*0Sstevel@tonic-gate #endif 2084*0Sstevel@tonic-gate return (FC_NOMAP); 2085*0Sstevel@tonic-gate } 2086*0Sstevel@tonic-gate } 2087*0Sstevel@tonic-gate 2088*0Sstevel@tonic-gate /*ARGSUSED*/ 2089*0Sstevel@tonic-gate static faultcode_t 2090*0Sstevel@tonic-gate segspt_shmfaulta(struct seg *seg, caddr_t addr) 2091*0Sstevel@tonic-gate { 2092*0Sstevel@tonic-gate return (0); 2093*0Sstevel@tonic-gate } 2094*0Sstevel@tonic-gate 2095*0Sstevel@tonic-gate /*ARGSUSED*/ 2096*0Sstevel@tonic-gate static int 2097*0Sstevel@tonic-gate segspt_shmkluster(struct seg *seg, caddr_t addr, ssize_t delta) 2098*0Sstevel@tonic-gate { 2099*0Sstevel@tonic-gate return (0); 2100*0Sstevel@tonic-gate } 2101*0Sstevel@tonic-gate 2102*0Sstevel@tonic-gate /*ARGSUSED*/ 2103*0Sstevel@tonic-gate static size_t 2104*0Sstevel@tonic-gate segspt_shmswapout(struct seg *seg) 2105*0Sstevel@tonic-gate { 2106*0Sstevel@tonic-gate return (0); 2107*0Sstevel@tonic-gate } 2108*0Sstevel@tonic-gate 2109*0Sstevel@tonic-gate /* 2110*0Sstevel@tonic-gate * duplicate the shared page tables 2111*0Sstevel@tonic-gate */ 2112*0Sstevel@tonic-gate int 2113*0Sstevel@tonic-gate segspt_shmdup(struct seg *seg, struct seg *newseg) 2114*0Sstevel@tonic-gate { 2115*0Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 2116*0Sstevel@tonic-gate struct anon_map *amp = shmd->shm_amp; 2117*0Sstevel@tonic-gate struct shm_data *shmd_new; 2118*0Sstevel@tonic-gate struct seg *spt_seg = shmd->shm_sptseg; 2119*0Sstevel@tonic-gate struct spt_data *sptd = spt_seg->s_data; 2120*0Sstevel@tonic-gate 2121*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock)); 2122*0Sstevel@tonic-gate 2123*0Sstevel@tonic-gate shmd_new = kmem_zalloc((sizeof (*shmd_new)), KM_SLEEP); 2124*0Sstevel@tonic-gate newseg->s_data = (void *)shmd_new; 2125*0Sstevel@tonic-gate shmd_new->shm_sptas = shmd->shm_sptas; 2126*0Sstevel@tonic-gate shmd_new->shm_amp = amp; 2127*0Sstevel@tonic-gate shmd_new->shm_sptseg = shmd->shm_sptseg; 2128*0Sstevel@tonic-gate newseg->s_ops = &segspt_shmops; 2129*0Sstevel@tonic-gate newseg->s_szc = seg->s_szc; 2130*0Sstevel@tonic-gate ASSERT(seg->s_szc == shmd->shm_sptseg->s_szc); 2131*0Sstevel@tonic-gate 2132*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 2133*0Sstevel@tonic-gate amp->refcnt++; 2134*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 2135*0Sstevel@tonic-gate 2136*0Sstevel@tonic-gate if (sptd->spt_flags & SHM_PAGEABLE) { 2137*0Sstevel@tonic-gate shmd_new->shm_vpage = kmem_zalloc(btopr(amp->size), KM_SLEEP); 2138*0Sstevel@tonic-gate shmd_new->shm_lckpgs = 0; 2139*0Sstevel@tonic-gate } 2140*0Sstevel@tonic-gate return (hat_share(newseg->s_as->a_hat, newseg->s_base, 2141*0Sstevel@tonic-gate shmd->shm_sptas->a_hat, SEGSPTADDR, seg->s_size, seg->s_szc)); 2142*0Sstevel@tonic-gate } 2143*0Sstevel@tonic-gate 2144*0Sstevel@tonic-gate /*ARGSUSED*/ 2145*0Sstevel@tonic-gate int 2146*0Sstevel@tonic-gate segspt_shmcheckprot(struct seg *seg, caddr_t addr, size_t size, uint_t prot) 2147*0Sstevel@tonic-gate { 2148*0Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 2149*0Sstevel@tonic-gate struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data; 2150*0Sstevel@tonic-gate 2151*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 2152*0Sstevel@tonic-gate 2153*0Sstevel@tonic-gate /* 2154*0Sstevel@tonic-gate * ISM segment is always rw. 2155*0Sstevel@tonic-gate */ 2156*0Sstevel@tonic-gate return (((sptd->spt_prot & prot) != prot) ? EACCES : 0); 2157*0Sstevel@tonic-gate } 2158*0Sstevel@tonic-gate 2159*0Sstevel@tonic-gate /* 2160*0Sstevel@tonic-gate * Return an array of locked large pages, for empty slots allocate 2161*0Sstevel@tonic-gate * private zero-filled anon pages. 2162*0Sstevel@tonic-gate */ 2163*0Sstevel@tonic-gate static int 2164*0Sstevel@tonic-gate spt_anon_getpages( 2165*0Sstevel@tonic-gate struct seg *sptseg, 2166*0Sstevel@tonic-gate caddr_t sptaddr, 2167*0Sstevel@tonic-gate size_t len, 2168*0Sstevel@tonic-gate page_t *ppa[]) 2169*0Sstevel@tonic-gate { 2170*0Sstevel@tonic-gate struct spt_data *sptd = sptseg->s_data; 2171*0Sstevel@tonic-gate struct anon_map *amp = sptd->spt_amp; 2172*0Sstevel@tonic-gate enum seg_rw rw = sptd->spt_prot; 2173*0Sstevel@tonic-gate uint_t szc = sptseg->s_szc; 2174*0Sstevel@tonic-gate size_t pg_sz, share_sz = page_get_pagesize(szc); 2175*0Sstevel@tonic-gate pgcnt_t lp_npgs; 2176*0Sstevel@tonic-gate caddr_t lp_addr, e_sptaddr; 2177*0Sstevel@tonic-gate uint_t vpprot, ppa_szc = 0; 2178*0Sstevel@tonic-gate struct vpage *vpage = NULL; 2179*0Sstevel@tonic-gate ulong_t j, ppa_idx; 2180*0Sstevel@tonic-gate int err, ierr = 0; 2181*0Sstevel@tonic-gate pgcnt_t an_idx; 2182*0Sstevel@tonic-gate anon_sync_obj_t cookie; 2183*0Sstevel@tonic-gate 2184*0Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(sptaddr, share_sz) && IS_P2ALIGNED(len, share_sz)); 2185*0Sstevel@tonic-gate ASSERT(len != 0); 2186*0Sstevel@tonic-gate 2187*0Sstevel@tonic-gate pg_sz = share_sz; 2188*0Sstevel@tonic-gate lp_npgs = btop(pg_sz); 2189*0Sstevel@tonic-gate lp_addr = sptaddr; 2190*0Sstevel@tonic-gate e_sptaddr = sptaddr + len; 2191*0Sstevel@tonic-gate an_idx = seg_page(sptseg, sptaddr); 2192*0Sstevel@tonic-gate ppa_idx = 0; 2193*0Sstevel@tonic-gate 2194*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 2195*0Sstevel@tonic-gate /*CONSTCOND*/ 2196*0Sstevel@tonic-gate while (1) { 2197*0Sstevel@tonic-gate for (; lp_addr < e_sptaddr; 2198*0Sstevel@tonic-gate an_idx += lp_npgs, lp_addr += pg_sz, 2199*0Sstevel@tonic-gate ppa_idx += lp_npgs) { 2200*0Sstevel@tonic-gate 2201*0Sstevel@tonic-gate anon_array_enter(amp, an_idx, &cookie); 2202*0Sstevel@tonic-gate ppa_szc = (uint_t)-1; 2203*0Sstevel@tonic-gate ierr = anon_map_getpages(amp, an_idx, szc, sptseg, 2204*0Sstevel@tonic-gate lp_addr, sptd->spt_prot, &vpprot, &ppa[ppa_idx], 2205*0Sstevel@tonic-gate &ppa_szc, vpage, rw, 0, segvn_anypgsz, kcred); 2206*0Sstevel@tonic-gate anon_array_exit(&cookie); 2207*0Sstevel@tonic-gate 2208*0Sstevel@tonic-gate if (ierr != 0) { 2209*0Sstevel@tonic-gate if (ierr > 0) { 2210*0Sstevel@tonic-gate err = FC_MAKE_ERR(ierr); 2211*0Sstevel@tonic-gate goto lpgs_err; 2212*0Sstevel@tonic-gate } 2213*0Sstevel@tonic-gate break; 2214*0Sstevel@tonic-gate } 2215*0Sstevel@tonic-gate } 2216*0Sstevel@tonic-gate if (lp_addr == e_sptaddr) { 2217*0Sstevel@tonic-gate break; 2218*0Sstevel@tonic-gate } 2219*0Sstevel@tonic-gate ASSERT(lp_addr < e_sptaddr); 2220*0Sstevel@tonic-gate 2221*0Sstevel@tonic-gate /* 2222*0Sstevel@tonic-gate * ierr == -1 means we failed to allocate a large page. 2223*0Sstevel@tonic-gate * so do a size down operation. 2224*0Sstevel@tonic-gate * 2225*0Sstevel@tonic-gate * ierr == -2 means some other process that privately shares 2226*0Sstevel@tonic-gate * pages with this process has allocated a larger page and we 2227*0Sstevel@tonic-gate * need to retry with larger pages. So do a size up 2228*0Sstevel@tonic-gate * operation. This relies on the fact that large pages are 2229*0Sstevel@tonic-gate * never partially shared i.e. if we share any constituent 2230*0Sstevel@tonic-gate * page of a large page with another process we must share the 2231*0Sstevel@tonic-gate * entire large page. Note this cannot happen for SOFTLOCK 2232*0Sstevel@tonic-gate * case, unless current address (lpaddr) is at the beginning 2233*0Sstevel@tonic-gate * of the next page size boundary because the other process 2234*0Sstevel@tonic-gate * couldn't have relocated locked pages. 2235*0Sstevel@tonic-gate */ 2236*0Sstevel@tonic-gate ASSERT(ierr == -1 || ierr == -2); 2237*0Sstevel@tonic-gate if (segvn_anypgsz) { 2238*0Sstevel@tonic-gate ASSERT(ierr == -2 || szc != 0); 2239*0Sstevel@tonic-gate ASSERT(ierr == -1 || szc < sptseg->s_szc); 2240*0Sstevel@tonic-gate szc = (ierr == -1) ? szc - 1 : szc + 1; 2241*0Sstevel@tonic-gate } else { 2242*0Sstevel@tonic-gate /* 2243*0Sstevel@tonic-gate * For faults and segvn_anypgsz == 0 2244*0Sstevel@tonic-gate * we need to be careful not to loop forever 2245*0Sstevel@tonic-gate * if existing page is found with szc other 2246*0Sstevel@tonic-gate * than 0 or seg->s_szc. This could be due 2247*0Sstevel@tonic-gate * to page relocations on behalf of DR or 2248*0Sstevel@tonic-gate * more likely large page creation. For this 2249*0Sstevel@tonic-gate * case simply re-size to existing page's szc 2250*0Sstevel@tonic-gate * if returned by anon_map_getpages(). 2251*0Sstevel@tonic-gate */ 2252*0Sstevel@tonic-gate if (ppa_szc == (uint_t)-1) { 2253*0Sstevel@tonic-gate szc = (ierr == -1) ? 0 : sptseg->s_szc; 2254*0Sstevel@tonic-gate } else { 2255*0Sstevel@tonic-gate ASSERT(ppa_szc <= sptseg->s_szc); 2256*0Sstevel@tonic-gate ASSERT(ierr == -2 || ppa_szc < szc); 2257*0Sstevel@tonic-gate ASSERT(ierr == -1 || ppa_szc > szc); 2258*0Sstevel@tonic-gate szc = ppa_szc; 2259*0Sstevel@tonic-gate } 2260*0Sstevel@tonic-gate } 2261*0Sstevel@tonic-gate pg_sz = page_get_pagesize(szc); 2262*0Sstevel@tonic-gate lp_npgs = btop(pg_sz); 2263*0Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(lp_addr, pg_sz)); 2264*0Sstevel@tonic-gate } 2265*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 2266*0Sstevel@tonic-gate return (0); 2267*0Sstevel@tonic-gate 2268*0Sstevel@tonic-gate lpgs_err: 2269*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 2270*0Sstevel@tonic-gate for (j = 0; j < ppa_idx; j++) 2271*0Sstevel@tonic-gate page_unlock(ppa[j]); 2272*0Sstevel@tonic-gate return (err); 2273*0Sstevel@tonic-gate } 2274*0Sstevel@tonic-gate 2275*0Sstevel@tonic-gate int 2276*0Sstevel@tonic-gate spt_lockpages(struct seg *seg, pgcnt_t anon_index, pgcnt_t npages, 2277*0Sstevel@tonic-gate page_t **ppa, ulong_t *lockmap, size_t pos) 2278*0Sstevel@tonic-gate { 2279*0Sstevel@tonic-gate struct shm_data *shmd = seg->s_data; 2280*0Sstevel@tonic-gate struct spt_data *sptd = shmd->shm_sptseg->s_data; 2281*0Sstevel@tonic-gate ulong_t i; 2282*0Sstevel@tonic-gate int kernel; 2283*0Sstevel@tonic-gate 2284*0Sstevel@tonic-gate for (i = 0; i < npages; anon_index++, pos++, i++) { 2285*0Sstevel@tonic-gate if (!(shmd->shm_vpage[anon_index] & DISM_PG_LOCKED)) { 2286*0Sstevel@tonic-gate if (sptd->spt_ppa_lckcnt[anon_index] < 2287*0Sstevel@tonic-gate (ushort_t)DISM_LOCK_MAX) { 2288*0Sstevel@tonic-gate if (++sptd->spt_ppa_lckcnt[anon_index] == 2289*0Sstevel@tonic-gate (ushort_t)DISM_LOCK_MAX) { 2290*0Sstevel@tonic-gate cmn_err(CE_WARN, 2291*0Sstevel@tonic-gate "DISM page lock limit " 2292*0Sstevel@tonic-gate "reached on DISM offset 0x%lx\n", 2293*0Sstevel@tonic-gate anon_index << PAGESHIFT); 2294*0Sstevel@tonic-gate } 2295*0Sstevel@tonic-gate kernel = (sptd->spt_ppa && 2296*0Sstevel@tonic-gate sptd->spt_ppa[anon_index]) ? 1 : 0; 2297*0Sstevel@tonic-gate if (!page_pp_lock(ppa[i], 0, kernel)) { 2298*0Sstevel@tonic-gate /* unlock rest of the pages */ 2299*0Sstevel@tonic-gate for (; i < npages; i++) 2300*0Sstevel@tonic-gate page_unlock(ppa[i]); 2301*0Sstevel@tonic-gate sptd->spt_ppa_lckcnt[anon_index]--; 2302*0Sstevel@tonic-gate return (EAGAIN); 2303*0Sstevel@tonic-gate } 2304*0Sstevel@tonic-gate shmd->shm_lckpgs++; 2305*0Sstevel@tonic-gate shmd->shm_vpage[anon_index] |= DISM_PG_LOCKED; 2306*0Sstevel@tonic-gate if (lockmap != NULL) 2307*0Sstevel@tonic-gate BT_SET(lockmap, pos); 2308*0Sstevel@tonic-gate } 2309*0Sstevel@tonic-gate } 2310*0Sstevel@tonic-gate page_unlock(ppa[i]); 2311*0Sstevel@tonic-gate } 2312*0Sstevel@tonic-gate return (0); 2313*0Sstevel@tonic-gate } 2314*0Sstevel@tonic-gate 2315*0Sstevel@tonic-gate /*ARGSUSED*/ 2316*0Sstevel@tonic-gate static int 2317*0Sstevel@tonic-gate segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len, 2318*0Sstevel@tonic-gate int attr, int op, ulong_t *lockmap, size_t pos) 2319*0Sstevel@tonic-gate { 2320*0Sstevel@tonic-gate struct shm_data *shmd = seg->s_data; 2321*0Sstevel@tonic-gate struct seg *sptseg = shmd->shm_sptseg; 2322*0Sstevel@tonic-gate struct spt_data *sptd = sptseg->s_data; 2323*0Sstevel@tonic-gate pgcnt_t npages, a_npages; 2324*0Sstevel@tonic-gate page_t **ppa; 2325*0Sstevel@tonic-gate pgcnt_t an_idx, a_an_idx, ppa_idx; 2326*0Sstevel@tonic-gate caddr_t spt_addr, a_addr; /* spt and aligned address */ 2327*0Sstevel@tonic-gate size_t a_len; /* aligned len */ 2328*0Sstevel@tonic-gate size_t share_sz; 2329*0Sstevel@tonic-gate ulong_t i; 2330*0Sstevel@tonic-gate int sts = 0; 2331*0Sstevel@tonic-gate 2332*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 2333*0Sstevel@tonic-gate 2334*0Sstevel@tonic-gate if ((sptd->spt_flags & SHM_PAGEABLE) == 0) { 2335*0Sstevel@tonic-gate return (0); 2336*0Sstevel@tonic-gate } 2337*0Sstevel@tonic-gate 2338*0Sstevel@tonic-gate addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2339*0Sstevel@tonic-gate an_idx = seg_page(seg, addr); 2340*0Sstevel@tonic-gate npages = btopr(len); 2341*0Sstevel@tonic-gate 2342*0Sstevel@tonic-gate if (an_idx + npages > btopr(shmd->shm_amp->size)) { 2343*0Sstevel@tonic-gate return (ENOMEM); 2344*0Sstevel@tonic-gate } 2345*0Sstevel@tonic-gate 2346*0Sstevel@tonic-gate if (op == MC_LOCK) { 2347*0Sstevel@tonic-gate /* 2348*0Sstevel@tonic-gate * Need to align addr and size request if they are not 2349*0Sstevel@tonic-gate * aligned so we can always allocate large page(s) however 2350*0Sstevel@tonic-gate * we only lock what was requested in initial request. 2351*0Sstevel@tonic-gate */ 2352*0Sstevel@tonic-gate share_sz = page_get_pagesize(sptseg->s_szc); 2353*0Sstevel@tonic-gate a_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), share_sz); 2354*0Sstevel@tonic-gate a_len = P2ROUNDUP((uintptr_t)(((addr + len) - a_addr)), 2355*0Sstevel@tonic-gate share_sz); 2356*0Sstevel@tonic-gate a_npages = btop(a_len); 2357*0Sstevel@tonic-gate a_an_idx = seg_page(seg, a_addr); 2358*0Sstevel@tonic-gate spt_addr = sptseg->s_base + ptob(a_an_idx); 2359*0Sstevel@tonic-gate ppa_idx = an_idx - a_an_idx; 2360*0Sstevel@tonic-gate 2361*0Sstevel@tonic-gate if ((ppa = kmem_zalloc(((sizeof (page_t *)) * a_npages), 2362*0Sstevel@tonic-gate KM_NOSLEEP)) == NULL) { 2363*0Sstevel@tonic-gate return (ENOMEM); 2364*0Sstevel@tonic-gate } 2365*0Sstevel@tonic-gate 2366*0Sstevel@tonic-gate /* 2367*0Sstevel@tonic-gate * Don't cache any new pages for IO and 2368*0Sstevel@tonic-gate * flush any cached pages. 2369*0Sstevel@tonic-gate */ 2370*0Sstevel@tonic-gate mutex_enter(&sptd->spt_lock); 2371*0Sstevel@tonic-gate if (sptd->spt_ppa != NULL) 2372*0Sstevel@tonic-gate sptd->spt_flags |= DISM_PPA_CHANGED; 2373*0Sstevel@tonic-gate 2374*0Sstevel@tonic-gate sts = spt_anon_getpages(sptseg, spt_addr, a_len, ppa); 2375*0Sstevel@tonic-gate if (sts != 0) { 2376*0Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 2377*0Sstevel@tonic-gate kmem_free(ppa, ((sizeof (page_t *)) * a_npages)); 2378*0Sstevel@tonic-gate return (sts); 2379*0Sstevel@tonic-gate } 2380*0Sstevel@tonic-gate 2381*0Sstevel@tonic-gate sts = spt_lockpages(seg, an_idx, npages, 2382*0Sstevel@tonic-gate &ppa[ppa_idx], lockmap, pos); 2383*0Sstevel@tonic-gate /* 2384*0Sstevel@tonic-gate * unlock remaining pages for requests which are not 2385*0Sstevel@tonic-gate * aligned or not in 4 M chunks 2386*0Sstevel@tonic-gate */ 2387*0Sstevel@tonic-gate for (i = 0; i < ppa_idx; i++) 2388*0Sstevel@tonic-gate page_unlock(ppa[i]); 2389*0Sstevel@tonic-gate for (i = ppa_idx + npages; i < a_npages; i++) 2390*0Sstevel@tonic-gate page_unlock(ppa[i]); 2391*0Sstevel@tonic-gate if (sptd->spt_ppa != NULL) 2392*0Sstevel@tonic-gate sptd->spt_flags |= DISM_PPA_CHANGED; 2393*0Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 2394*0Sstevel@tonic-gate 2395*0Sstevel@tonic-gate kmem_free(ppa, ((sizeof (page_t *)) * a_npages)); 2396*0Sstevel@tonic-gate 2397*0Sstevel@tonic-gate } else if (op == MC_UNLOCK) { /* unlock */ 2398*0Sstevel@tonic-gate struct anon_map *amp; 2399*0Sstevel@tonic-gate struct anon *ap; 2400*0Sstevel@tonic-gate struct vnode *vp; 2401*0Sstevel@tonic-gate u_offset_t off; 2402*0Sstevel@tonic-gate struct page *pp; 2403*0Sstevel@tonic-gate int kernel; 2404*0Sstevel@tonic-gate anon_sync_obj_t cookie; 2405*0Sstevel@tonic-gate 2406*0Sstevel@tonic-gate amp = sptd->spt_amp; 2407*0Sstevel@tonic-gate mutex_enter(&sptd->spt_lock); 2408*0Sstevel@tonic-gate if (shmd->shm_lckpgs == 0) { 2409*0Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 2410*0Sstevel@tonic-gate return (0); 2411*0Sstevel@tonic-gate } 2412*0Sstevel@tonic-gate /* 2413*0Sstevel@tonic-gate * Don't cache new IO pages. 2414*0Sstevel@tonic-gate */ 2415*0Sstevel@tonic-gate if (sptd->spt_ppa != NULL) 2416*0Sstevel@tonic-gate sptd->spt_flags |= DISM_PPA_CHANGED; 2417*0Sstevel@tonic-gate 2418*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 2419*0Sstevel@tonic-gate for (i = 0; i < npages; i++, an_idx++) { 2420*0Sstevel@tonic-gate if (shmd->shm_vpage[an_idx] & DISM_PG_LOCKED) { 2421*0Sstevel@tonic-gate anon_array_enter(amp, an_idx, &cookie); 2422*0Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, an_idx); 2423*0Sstevel@tonic-gate ASSERT(ap); 2424*0Sstevel@tonic-gate ASSERT(sptd->spt_ppa_lckcnt[an_idx] > 0); 2425*0Sstevel@tonic-gate 2426*0Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 2427*0Sstevel@tonic-gate anon_array_exit(&cookie); 2428*0Sstevel@tonic-gate pp = page_lookup(vp, off, SE_SHARED); 2429*0Sstevel@tonic-gate ASSERT(pp); 2430*0Sstevel@tonic-gate /* 2431*0Sstevel@tonic-gate * the availrmem is decremented only for 2432*0Sstevel@tonic-gate * pages which are not in seg pcache, 2433*0Sstevel@tonic-gate * for pages in seg pcache availrmem was 2434*0Sstevel@tonic-gate * decremented in _dismpagelock() (if 2435*0Sstevel@tonic-gate * they were not locked here) 2436*0Sstevel@tonic-gate */ 2437*0Sstevel@tonic-gate kernel = (sptd->spt_ppa && 2438*0Sstevel@tonic-gate sptd->spt_ppa[an_idx]) ? 1 : 0; 2439*0Sstevel@tonic-gate page_pp_unlock(pp, 0, kernel); 2440*0Sstevel@tonic-gate page_unlock(pp); 2441*0Sstevel@tonic-gate shmd->shm_vpage[an_idx] &= ~DISM_PG_LOCKED; 2442*0Sstevel@tonic-gate sptd->spt_ppa_lckcnt[an_idx]--; 2443*0Sstevel@tonic-gate shmd->shm_lckpgs--; 2444*0Sstevel@tonic-gate } 2445*0Sstevel@tonic-gate } 2446*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 2447*0Sstevel@tonic-gate if (sptd->spt_ppa != NULL) 2448*0Sstevel@tonic-gate sptd->spt_flags |= DISM_PPA_CHANGED; 2449*0Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 2450*0Sstevel@tonic-gate } 2451*0Sstevel@tonic-gate return (sts); 2452*0Sstevel@tonic-gate } 2453*0Sstevel@tonic-gate 2454*0Sstevel@tonic-gate /*ARGSUSED*/ 2455*0Sstevel@tonic-gate int 2456*0Sstevel@tonic-gate segspt_shmgetprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) 2457*0Sstevel@tonic-gate { 2458*0Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 2459*0Sstevel@tonic-gate struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data; 2460*0Sstevel@tonic-gate spgcnt_t pgno = seg_page(seg, addr+len) - seg_page(seg, addr) + 1; 2461*0Sstevel@tonic-gate 2462*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 2463*0Sstevel@tonic-gate 2464*0Sstevel@tonic-gate /* 2465*0Sstevel@tonic-gate * ISM segment is always rw. 2466*0Sstevel@tonic-gate */ 2467*0Sstevel@tonic-gate while (--pgno >= 0) 2468*0Sstevel@tonic-gate *protv++ = sptd->spt_prot; 2469*0Sstevel@tonic-gate return (0); 2470*0Sstevel@tonic-gate } 2471*0Sstevel@tonic-gate 2472*0Sstevel@tonic-gate /*ARGSUSED*/ 2473*0Sstevel@tonic-gate u_offset_t 2474*0Sstevel@tonic-gate segspt_shmgetoffset(struct seg *seg, caddr_t addr) 2475*0Sstevel@tonic-gate { 2476*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 2477*0Sstevel@tonic-gate 2478*0Sstevel@tonic-gate /* Offset does not matter in ISM memory */ 2479*0Sstevel@tonic-gate 2480*0Sstevel@tonic-gate return ((u_offset_t)0); 2481*0Sstevel@tonic-gate } 2482*0Sstevel@tonic-gate 2483*0Sstevel@tonic-gate /* ARGSUSED */ 2484*0Sstevel@tonic-gate int 2485*0Sstevel@tonic-gate segspt_shmgettype(struct seg *seg, caddr_t addr) 2486*0Sstevel@tonic-gate { 2487*0Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 2488*0Sstevel@tonic-gate struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data; 2489*0Sstevel@tonic-gate 2490*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 2491*0Sstevel@tonic-gate 2492*0Sstevel@tonic-gate /* 2493*0Sstevel@tonic-gate * The shared memory mapping is always MAP_SHARED, SWAP is only 2494*0Sstevel@tonic-gate * reserved for DISM 2495*0Sstevel@tonic-gate */ 2496*0Sstevel@tonic-gate return (MAP_SHARED | 2497*0Sstevel@tonic-gate ((sptd->spt_flags & SHM_PAGEABLE) ? 0 : MAP_NORESERVE)); 2498*0Sstevel@tonic-gate } 2499*0Sstevel@tonic-gate 2500*0Sstevel@tonic-gate /*ARGSUSED*/ 2501*0Sstevel@tonic-gate int 2502*0Sstevel@tonic-gate segspt_shmgetvp(struct seg *seg, caddr_t addr, struct vnode **vpp) 2503*0Sstevel@tonic-gate { 2504*0Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 2505*0Sstevel@tonic-gate struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data; 2506*0Sstevel@tonic-gate 2507*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 2508*0Sstevel@tonic-gate 2509*0Sstevel@tonic-gate *vpp = sptd->spt_vp; 2510*0Sstevel@tonic-gate return (0); 2511*0Sstevel@tonic-gate } 2512*0Sstevel@tonic-gate 2513*0Sstevel@tonic-gate /*ARGSUSED*/ 2514*0Sstevel@tonic-gate static int 2515*0Sstevel@tonic-gate segspt_shmadvise(struct seg *seg, caddr_t addr, size_t len, uint_t behav) 2516*0Sstevel@tonic-gate { 2517*0Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 2518*0Sstevel@tonic-gate struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data; 2519*0Sstevel@tonic-gate struct anon_map *amp; 2520*0Sstevel@tonic-gate pgcnt_t pg_idx; 2521*0Sstevel@tonic-gate 2522*0Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 2523*0Sstevel@tonic-gate 2524*0Sstevel@tonic-gate if (behav == MADV_FREE) { 2525*0Sstevel@tonic-gate if ((sptd->spt_flags & SHM_PAGEABLE) == 0) 2526*0Sstevel@tonic-gate return (0); 2527*0Sstevel@tonic-gate 2528*0Sstevel@tonic-gate amp = sptd->spt_amp; 2529*0Sstevel@tonic-gate pg_idx = seg_page(seg, addr); 2530*0Sstevel@tonic-gate 2531*0Sstevel@tonic-gate mutex_enter(&sptd->spt_lock); 2532*0Sstevel@tonic-gate if (sptd->spt_ppa != NULL) 2533*0Sstevel@tonic-gate sptd->spt_flags |= DISM_PPA_CHANGED; 2534*0Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 2535*0Sstevel@tonic-gate 2536*0Sstevel@tonic-gate /* 2537*0Sstevel@tonic-gate * Purge all DISM cached pages 2538*0Sstevel@tonic-gate */ 2539*0Sstevel@tonic-gate seg_ppurge_seg(segspt_reclaim); 2540*0Sstevel@tonic-gate 2541*0Sstevel@tonic-gate mutex_enter(&sptd->spt_lock); 2542*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 2543*0Sstevel@tonic-gate anon_disclaim(amp, pg_idx, len, ANON_PGLOOKUP_BLK); 2544*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 2545*0Sstevel@tonic-gate mutex_exit(&sptd->spt_lock); 2546*0Sstevel@tonic-gate } else if (lgrp_optimizations() && (behav == MADV_ACCESS_LWP || 2547*0Sstevel@tonic-gate behav == MADV_ACCESS_MANY || behav == MADV_ACCESS_DEFAULT)) { 2548*0Sstevel@tonic-gate int already_set; 2549*0Sstevel@tonic-gate ulong_t anon_index; 2550*0Sstevel@tonic-gate lgrp_mem_policy_t policy; 2551*0Sstevel@tonic-gate caddr_t shm_addr; 2552*0Sstevel@tonic-gate size_t share_size; 2553*0Sstevel@tonic-gate size_t size; 2554*0Sstevel@tonic-gate struct seg *sptseg = shmd->shm_sptseg; 2555*0Sstevel@tonic-gate caddr_t sptseg_addr; 2556*0Sstevel@tonic-gate 2557*0Sstevel@tonic-gate /* 2558*0Sstevel@tonic-gate * Align address and length to page size of underlying segment 2559*0Sstevel@tonic-gate */ 2560*0Sstevel@tonic-gate share_size = page_get_pagesize(shmd->shm_sptseg->s_szc); 2561*0Sstevel@tonic-gate shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), share_size); 2562*0Sstevel@tonic-gate size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), 2563*0Sstevel@tonic-gate share_size); 2564*0Sstevel@tonic-gate 2565*0Sstevel@tonic-gate amp = shmd->shm_amp; 2566*0Sstevel@tonic-gate anon_index = seg_page(seg, shm_addr); 2567*0Sstevel@tonic-gate 2568*0Sstevel@tonic-gate /* 2569*0Sstevel@tonic-gate * And now we may have to adjust size downward if we have 2570*0Sstevel@tonic-gate * exceeded the realsize of the segment or initial anon 2571*0Sstevel@tonic-gate * allocations. 2572*0Sstevel@tonic-gate */ 2573*0Sstevel@tonic-gate sptseg_addr = sptseg->s_base + ptob(anon_index); 2574*0Sstevel@tonic-gate if ((sptseg_addr + size) > 2575*0Sstevel@tonic-gate (sptseg->s_base + sptd->spt_realsize)) 2576*0Sstevel@tonic-gate size = (sptseg->s_base + sptd->spt_realsize) - 2577*0Sstevel@tonic-gate sptseg_addr; 2578*0Sstevel@tonic-gate 2579*0Sstevel@tonic-gate /* 2580*0Sstevel@tonic-gate * Set memory allocation policy for this segment 2581*0Sstevel@tonic-gate */ 2582*0Sstevel@tonic-gate policy = lgrp_madv_to_policy(behav, len, MAP_SHARED); 2583*0Sstevel@tonic-gate already_set = lgrp_shm_policy_set(policy, amp, anon_index, 2584*0Sstevel@tonic-gate NULL, 0, len); 2585*0Sstevel@tonic-gate 2586*0Sstevel@tonic-gate /* 2587*0Sstevel@tonic-gate * If random memory allocation policy set already, 2588*0Sstevel@tonic-gate * don't bother reapplying it. 2589*0Sstevel@tonic-gate */ 2590*0Sstevel@tonic-gate if (already_set && !LGRP_MEM_POLICY_REAPPLICABLE(policy)) 2591*0Sstevel@tonic-gate return (0); 2592*0Sstevel@tonic-gate 2593*0Sstevel@tonic-gate /* 2594*0Sstevel@tonic-gate * Mark any existing pages in the given range for 2595*0Sstevel@tonic-gate * migration, flushing the I/O page cache, and using 2596*0Sstevel@tonic-gate * underlying segment to calculate anon index and get 2597*0Sstevel@tonic-gate * anonmap and vnode pointer from 2598*0Sstevel@tonic-gate */ 2599*0Sstevel@tonic-gate if (shmd->shm_softlockcnt > 0) 2600*0Sstevel@tonic-gate segspt_purge(seg); 2601*0Sstevel@tonic-gate 2602*0Sstevel@tonic-gate page_mark_migrate(seg, shm_addr, size, amp, 0, NULL, 0, 0); 2603*0Sstevel@tonic-gate } 2604*0Sstevel@tonic-gate 2605*0Sstevel@tonic-gate return (0); 2606*0Sstevel@tonic-gate } 2607*0Sstevel@tonic-gate 2608*0Sstevel@tonic-gate /*ARGSUSED*/ 2609*0Sstevel@tonic-gate void 2610*0Sstevel@tonic-gate segspt_shmdump(struct seg *seg) 2611*0Sstevel@tonic-gate { 2612*0Sstevel@tonic-gate /* no-op for ISM segment */ 2613*0Sstevel@tonic-gate } 2614*0Sstevel@tonic-gate 2615*0Sstevel@tonic-gate /*ARGSUSED*/ 2616*0Sstevel@tonic-gate static faultcode_t 2617*0Sstevel@tonic-gate segspt_shmsetpgsz(struct seg *seg, caddr_t addr, size_t len, uint_t szc) 2618*0Sstevel@tonic-gate { 2619*0Sstevel@tonic-gate return (ENOTSUP); 2620*0Sstevel@tonic-gate } 2621*0Sstevel@tonic-gate 2622*0Sstevel@tonic-gate /* 2623*0Sstevel@tonic-gate * get a memory ID for an addr in a given segment 2624*0Sstevel@tonic-gate */ 2625*0Sstevel@tonic-gate static int 2626*0Sstevel@tonic-gate segspt_shmgetmemid(struct seg *seg, caddr_t addr, memid_t *memidp) 2627*0Sstevel@tonic-gate { 2628*0Sstevel@tonic-gate struct shm_data *shmd = (struct shm_data *)seg->s_data; 2629*0Sstevel@tonic-gate struct anon *ap; 2630*0Sstevel@tonic-gate size_t anon_index; 2631*0Sstevel@tonic-gate struct anon_map *amp = shmd->shm_amp; 2632*0Sstevel@tonic-gate struct spt_data *sptd = shmd->shm_sptseg->s_data; 2633*0Sstevel@tonic-gate struct seg *sptseg = shmd->shm_sptseg; 2634*0Sstevel@tonic-gate anon_sync_obj_t cookie; 2635*0Sstevel@tonic-gate 2636*0Sstevel@tonic-gate anon_index = seg_page(seg, addr); 2637*0Sstevel@tonic-gate 2638*0Sstevel@tonic-gate if (addr > (seg->s_base + sptd->spt_realsize)) { 2639*0Sstevel@tonic-gate return (EFAULT); 2640*0Sstevel@tonic-gate } 2641*0Sstevel@tonic-gate 2642*0Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 2643*0Sstevel@tonic-gate anon_array_enter(amp, anon_index, &cookie); 2644*0Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, anon_index); 2645*0Sstevel@tonic-gate if (ap == NULL) { 2646*0Sstevel@tonic-gate struct page *pp; 2647*0Sstevel@tonic-gate caddr_t spt_addr = sptseg->s_base + ptob(anon_index); 2648*0Sstevel@tonic-gate 2649*0Sstevel@tonic-gate pp = anon_zero(sptseg, spt_addr, &ap, kcred); 2650*0Sstevel@tonic-gate if (pp == NULL) { 2651*0Sstevel@tonic-gate anon_array_exit(&cookie); 2652*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 2653*0Sstevel@tonic-gate return (ENOMEM); 2654*0Sstevel@tonic-gate } 2655*0Sstevel@tonic-gate (void) anon_set_ptr(amp->ahp, anon_index, ap, ANON_SLEEP); 2656*0Sstevel@tonic-gate page_unlock(pp); 2657*0Sstevel@tonic-gate } 2658*0Sstevel@tonic-gate anon_array_exit(&cookie); 2659*0Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 2660*0Sstevel@tonic-gate memidp->val[0] = (uintptr_t)ap; 2661*0Sstevel@tonic-gate memidp->val[1] = (uintptr_t)addr & PAGEOFFSET; 2662*0Sstevel@tonic-gate return (0); 2663*0Sstevel@tonic-gate } 2664*0Sstevel@tonic-gate 2665*0Sstevel@tonic-gate /* 2666*0Sstevel@tonic-gate * Get memory allocation policy info for specified address in given segment 2667*0Sstevel@tonic-gate */ 2668*0Sstevel@tonic-gate static lgrp_mem_policy_info_t * 2669*0Sstevel@tonic-gate segspt_shmgetpolicy(struct seg *seg, caddr_t addr) 2670*0Sstevel@tonic-gate { 2671*0Sstevel@tonic-gate struct anon_map *amp; 2672*0Sstevel@tonic-gate ulong_t anon_index; 2673*0Sstevel@tonic-gate lgrp_mem_policy_info_t *policy_info; 2674*0Sstevel@tonic-gate struct shm_data *shm_data; 2675*0Sstevel@tonic-gate 2676*0Sstevel@tonic-gate ASSERT(seg != NULL); 2677*0Sstevel@tonic-gate 2678*0Sstevel@tonic-gate /* 2679*0Sstevel@tonic-gate * Get anon_map from segshm 2680*0Sstevel@tonic-gate * 2681*0Sstevel@tonic-gate * Assume that no lock needs to be held on anon_map, since 2682*0Sstevel@tonic-gate * it should be protected by its reference count which must be 2683*0Sstevel@tonic-gate * nonzero for an existing segment 2684*0Sstevel@tonic-gate * Need to grab readers lock on policy tree though 2685*0Sstevel@tonic-gate */ 2686*0Sstevel@tonic-gate shm_data = (struct shm_data *)seg->s_data; 2687*0Sstevel@tonic-gate if (shm_data == NULL) 2688*0Sstevel@tonic-gate return (NULL); 2689*0Sstevel@tonic-gate amp = shm_data->shm_amp; 2690*0Sstevel@tonic-gate ASSERT(amp->refcnt != 0); 2691*0Sstevel@tonic-gate 2692*0Sstevel@tonic-gate /* 2693*0Sstevel@tonic-gate * Get policy info 2694*0Sstevel@tonic-gate * 2695*0Sstevel@tonic-gate * Assume starting anon index of 0 2696*0Sstevel@tonic-gate */ 2697*0Sstevel@tonic-gate anon_index = seg_page(seg, addr); 2698*0Sstevel@tonic-gate policy_info = lgrp_shm_policy_get(amp, anon_index, NULL, 0); 2699*0Sstevel@tonic-gate 2700*0Sstevel@tonic-gate return (policy_info); 2701*0Sstevel@tonic-gate } 2702