10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5925Scwb * Common Development and Distribution License (the "License"). 6925Scwb * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 2212093SDavid.Valin@Sun.COM * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved. 230Sstevel@tonic-gate */ 240Sstevel@tonic-gate 250Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 260Sstevel@tonic-gate /* All Rights Reserved */ 270Sstevel@tonic-gate 280Sstevel@tonic-gate /* 290Sstevel@tonic-gate * University Copyright- Copyright (c) 1982, 1986, 1988 300Sstevel@tonic-gate * The Regents of the University of California 310Sstevel@tonic-gate * All Rights Reserved 320Sstevel@tonic-gate * 330Sstevel@tonic-gate * University Acknowledgment- Portions of this document are derived from 340Sstevel@tonic-gate * software developed by the University of California, Berkeley, and its 350Sstevel@tonic-gate * contributors. 360Sstevel@tonic-gate */ 370Sstevel@tonic-gate 380Sstevel@tonic-gate /* 390Sstevel@tonic-gate * VM - anonymous pages. 400Sstevel@tonic-gate * 410Sstevel@tonic-gate * This layer sits immediately above the vm_swap layer. It manages 420Sstevel@tonic-gate * physical pages that have no permanent identity in the file system 430Sstevel@tonic-gate * name space, using the services of the vm_swap layer to allocate 440Sstevel@tonic-gate * backing storage for these pages. Since these pages have no external 450Sstevel@tonic-gate * identity, they are discarded when the last reference is removed. 460Sstevel@tonic-gate * 470Sstevel@tonic-gate * An important function of this layer is to manage low-level sharing 480Sstevel@tonic-gate * of pages that are logically distinct but that happen to be 490Sstevel@tonic-gate * physically identical (e.g., the corresponding pages of the processes 500Sstevel@tonic-gate * resulting from a fork before one process or the other changes their 510Sstevel@tonic-gate * contents). This pseudo-sharing is present only as an optimization 520Sstevel@tonic-gate * and is not to be confused with true sharing in which multiple 530Sstevel@tonic-gate * address spaces deliberately contain references to the same object; 540Sstevel@tonic-gate * such sharing is managed at a higher level. 550Sstevel@tonic-gate * 560Sstevel@tonic-gate * The key data structure here is the anon struct, which contains a 570Sstevel@tonic-gate * reference count for its associated physical page and a hint about 580Sstevel@tonic-gate * the identity of that page. Anon structs typically live in arrays, 590Sstevel@tonic-gate * with an instance's position in its array determining where the 600Sstevel@tonic-gate * corresponding backing storage is allocated; however, the swap_xlate() 610Sstevel@tonic-gate * routine abstracts away this representation information so that the 620Sstevel@tonic-gate * rest of the anon layer need not know it. (See the swap layer for 630Sstevel@tonic-gate * more details on anon struct layout.) 640Sstevel@tonic-gate * 650Sstevel@tonic-gate * In the future versions of the system, the association between an 660Sstevel@tonic-gate * anon struct and its position on backing store will change so that 670Sstevel@tonic-gate * we don't require backing store all anonymous pages in the system. 680Sstevel@tonic-gate * This is important for consideration for large memory systems. 690Sstevel@tonic-gate * We can also use this technique to delay binding physical locations 700Sstevel@tonic-gate * to anonymous pages until pageout/swapout time where we can make 710Sstevel@tonic-gate * smarter allocation decisions to improve anonymous klustering. 720Sstevel@tonic-gate * 730Sstevel@tonic-gate * Many of the routines defined here take a (struct anon **) argument, 740Sstevel@tonic-gate * which allows the code at this level to manage anon pages directly, 750Sstevel@tonic-gate * so that callers can regard anon structs as opaque objects and not be 760Sstevel@tonic-gate * concerned with assigning or inspecting their contents. 770Sstevel@tonic-gate * 780Sstevel@tonic-gate * Clients of this layer refer to anon pages indirectly. That is, they 790Sstevel@tonic-gate * maintain arrays of pointers to anon structs rather than maintaining 800Sstevel@tonic-gate * anon structs themselves. The (struct anon **) arguments mentioned 810Sstevel@tonic-gate * above are pointers to entries in these arrays. It is these arrays 820Sstevel@tonic-gate * that capture the mapping between offsets within a given segment and 830Sstevel@tonic-gate * the corresponding anonymous backing storage address. 840Sstevel@tonic-gate */ 850Sstevel@tonic-gate 860Sstevel@tonic-gate #ifdef DEBUG 870Sstevel@tonic-gate #define ANON_DEBUG 880Sstevel@tonic-gate #endif 890Sstevel@tonic-gate 900Sstevel@tonic-gate #include <sys/types.h> 910Sstevel@tonic-gate #include <sys/t_lock.h> 920Sstevel@tonic-gate #include <sys/param.h> 930Sstevel@tonic-gate #include <sys/systm.h> 940Sstevel@tonic-gate #include <sys/mman.h> 950Sstevel@tonic-gate #include <sys/cred.h> 960Sstevel@tonic-gate #include <sys/thread.h> 970Sstevel@tonic-gate #include <sys/vnode.h> 980Sstevel@tonic-gate #include <sys/cpuvar.h> 990Sstevel@tonic-gate #include <sys/swap.h> 1000Sstevel@tonic-gate #include <sys/cmn_err.h> 1010Sstevel@tonic-gate #include <sys/vtrace.h> 1020Sstevel@tonic-gate #include <sys/kmem.h> 1030Sstevel@tonic-gate #include <sys/sysmacros.h> 1040Sstevel@tonic-gate #include <sys/bitmap.h> 1050Sstevel@tonic-gate #include <sys/vmsystm.h> 1066695Saguzovsk #include <sys/tuneable.h> 1070Sstevel@tonic-gate #include <sys/debug.h> 108749Ssusans #include <sys/fs/swapnode.h> 1090Sstevel@tonic-gate #include <sys/tnf_probe.h> 1100Sstevel@tonic-gate #include <sys/lgrp.h> 1110Sstevel@tonic-gate #include <sys/policy.h> 1120Sstevel@tonic-gate #include <sys/condvar_impl.h> 1130Sstevel@tonic-gate #include <sys/mutex_impl.h> 1143247Sgjelinek #include <sys/rctl.h> 1150Sstevel@tonic-gate 1160Sstevel@tonic-gate #include <vm/as.h> 1170Sstevel@tonic-gate #include <vm/hat.h> 1180Sstevel@tonic-gate #include <vm/anon.h> 1190Sstevel@tonic-gate #include <vm/page.h> 1200Sstevel@tonic-gate #include <vm/vpage.h> 1210Sstevel@tonic-gate #include <vm/seg.h> 1220Sstevel@tonic-gate #include <vm/rm.h> 1230Sstevel@tonic-gate 1240Sstevel@tonic-gate #include <fs/fs_subr.h> 1250Sstevel@tonic-gate 126749Ssusans struct vnode *anon_vp; 127749Ssusans 1280Sstevel@tonic-gate int anon_debug; 1290Sstevel@tonic-gate 1300Sstevel@tonic-gate kmutex_t anoninfo_lock; 1310Sstevel@tonic-gate struct k_anoninfo k_anoninfo; 13212908SPavel.Tatashin@Sun.COM ani_free_t *ani_free_pool; 1330Sstevel@tonic-gate pad_mutex_t anon_array_lock[ANON_LOCKSIZE]; 1340Sstevel@tonic-gate kcondvar_t anon_array_cv[ANON_LOCKSIZE]; 1350Sstevel@tonic-gate 1360Sstevel@tonic-gate /* 1370Sstevel@tonic-gate * Global hash table for (vp, off) -> anon slot 1380Sstevel@tonic-gate */ 1390Sstevel@tonic-gate extern int swap_maxcontig; 1400Sstevel@tonic-gate size_t anon_hash_size; 14112230SFrank.Rival@oracle.com unsigned int anon_hash_shift; 1420Sstevel@tonic-gate struct anon **anon_hash; 1430Sstevel@tonic-gate 1440Sstevel@tonic-gate static struct kmem_cache *anon_cache; 1450Sstevel@tonic-gate static struct kmem_cache *anonmap_cache; 1460Sstevel@tonic-gate 14712173SMichael.Corcoran@Sun.COM pad_mutex_t *anonhash_lock; 14812173SMichael.Corcoran@Sun.COM 14912173SMichael.Corcoran@Sun.COM /* 15012173SMichael.Corcoran@Sun.COM * Used to make the increment of all refcnts of all anon slots of a large 15112173SMichael.Corcoran@Sun.COM * page appear to be atomic. The lock is grabbed for the first anon slot of 15212173SMichael.Corcoran@Sun.COM * a large page. 15312173SMichael.Corcoran@Sun.COM */ 15412173SMichael.Corcoran@Sun.COM pad_mutex_t *anonpages_hash_lock; 15512173SMichael.Corcoran@Sun.COM 15612173SMichael.Corcoran@Sun.COM #define APH_MUTEX(vp, off) \ 15712173SMichael.Corcoran@Sun.COM (&anonpages_hash_lock[(ANON_HASH((vp), (off)) & \ 15812173SMichael.Corcoran@Sun.COM (AH_LOCK_SIZE - 1))].pad_mutex) 15912173SMichael.Corcoran@Sun.COM 1600Sstevel@tonic-gate #ifdef VM_STATS 1610Sstevel@tonic-gate static struct anonvmstats_str { 1620Sstevel@tonic-gate ulong_t getpages[30]; 1630Sstevel@tonic-gate ulong_t privatepages[10]; 1640Sstevel@tonic-gate ulong_t demotepages[9]; 1650Sstevel@tonic-gate ulong_t decrefpages[9]; 1660Sstevel@tonic-gate ulong_t dupfillholes[4]; 1670Sstevel@tonic-gate ulong_t freepages[1]; 1680Sstevel@tonic-gate } anonvmstats; 1690Sstevel@tonic-gate #endif /* VM_STATS */ 1700Sstevel@tonic-gate 1710Sstevel@tonic-gate /*ARGSUSED*/ 1720Sstevel@tonic-gate static int 1730Sstevel@tonic-gate anonmap_cache_constructor(void *buf, void *cdrarg, int kmflags) 1740Sstevel@tonic-gate { 1750Sstevel@tonic-gate struct anon_map *amp = buf; 1760Sstevel@tonic-gate 1770Sstevel@tonic-gate rw_init(&->a_rwlock, NULL, RW_DEFAULT, NULL); 1786695Saguzovsk cv_init(&->a_purgecv, NULL, CV_DEFAULT, NULL); 1796695Saguzovsk mutex_init(&->a_pmtx, NULL, MUTEX_DEFAULT, NULL); 1806695Saguzovsk mutex_init(&->a_purgemtx, NULL, MUTEX_DEFAULT, NULL); 1810Sstevel@tonic-gate return (0); 1820Sstevel@tonic-gate } 1830Sstevel@tonic-gate 1840Sstevel@tonic-gate /*ARGSUSED1*/ 1850Sstevel@tonic-gate static void 1860Sstevel@tonic-gate anonmap_cache_destructor(void *buf, void *cdrarg) 1870Sstevel@tonic-gate { 1880Sstevel@tonic-gate struct anon_map *amp = buf; 1890Sstevel@tonic-gate 1900Sstevel@tonic-gate rw_destroy(&->a_rwlock); 1916695Saguzovsk cv_destroy(&->a_purgecv); 1926695Saguzovsk mutex_destroy(&->a_pmtx); 1936695Saguzovsk mutex_destroy(&->a_purgemtx); 1940Sstevel@tonic-gate } 1950Sstevel@tonic-gate 1960Sstevel@tonic-gate void 1970Sstevel@tonic-gate anon_init(void) 1980Sstevel@tonic-gate { 1990Sstevel@tonic-gate int i; 20012173SMichael.Corcoran@Sun.COM pad_mutex_t *tmp; 20112173SMichael.Corcoran@Sun.COM 20212173SMichael.Corcoran@Sun.COM /* These both need to be powers of 2 so round up to the next power */ 20312230SFrank.Rival@oracle.com anon_hash_shift = highbit((physmem / ANON_HASHAVELEN) - 1); 20412230SFrank.Rival@oracle.com anon_hash_size = 1L << anon_hash_shift; 20512173SMichael.Corcoran@Sun.COM 20612173SMichael.Corcoran@Sun.COM /* 20712173SMichael.Corcoran@Sun.COM * We need to align the anonhash_lock and anonpages_hash_lock arrays 20812173SMichael.Corcoran@Sun.COM * to a 64B boundary to avoid false sharing. We add 63B to our 20912173SMichael.Corcoran@Sun.COM * allocation so that we can get a 64B aligned address to use. 21012173SMichael.Corcoran@Sun.COM * We allocate both of these together to avoid wasting an additional 21112173SMichael.Corcoran@Sun.COM * 63B. 21212173SMichael.Corcoran@Sun.COM */ 21312173SMichael.Corcoran@Sun.COM tmp = kmem_zalloc((2 * AH_LOCK_SIZE * sizeof (pad_mutex_t)) + 63, 21412173SMichael.Corcoran@Sun.COM KM_SLEEP); 21512173SMichael.Corcoran@Sun.COM anonhash_lock = (pad_mutex_t *)P2ROUNDUP((uintptr_t)tmp, 64); 21612173SMichael.Corcoran@Sun.COM anonpages_hash_lock = anonhash_lock + AH_LOCK_SIZE; 2170Sstevel@tonic-gate 2180Sstevel@tonic-gate for (i = 0; i < AH_LOCK_SIZE; i++) { 21912173SMichael.Corcoran@Sun.COM mutex_init(&anonhash_lock[i].pad_mutex, NULL, MUTEX_DEFAULT, 22012173SMichael.Corcoran@Sun.COM NULL); 22112173SMichael.Corcoran@Sun.COM mutex_init(&anonpages_hash_lock[i].pad_mutex, NULL, 22212173SMichael.Corcoran@Sun.COM MUTEX_DEFAULT, NULL); 2230Sstevel@tonic-gate } 2240Sstevel@tonic-gate 2250Sstevel@tonic-gate for (i = 0; i < ANON_LOCKSIZE; i++) { 2260Sstevel@tonic-gate mutex_init(&anon_array_lock[i].pad_mutex, NULL, 2275466Skchow MUTEX_DEFAULT, NULL); 2280Sstevel@tonic-gate cv_init(&anon_array_cv[i], NULL, CV_DEFAULT, NULL); 2290Sstevel@tonic-gate } 2300Sstevel@tonic-gate 2310Sstevel@tonic-gate anon_hash = (struct anon **) 2325466Skchow kmem_zalloc(sizeof (struct anon *) * anon_hash_size, KM_SLEEP); 2330Sstevel@tonic-gate anon_cache = kmem_cache_create("anon_cache", sizeof (struct anon), 23412093SDavid.Valin@Sun.COM AN_CACHE_ALIGN, NULL, NULL, NULL, NULL, NULL, KMC_PREFILL); 2350Sstevel@tonic-gate anonmap_cache = kmem_cache_create("anonmap_cache", 2365466Skchow sizeof (struct anon_map), 0, 2375466Skchow anonmap_cache_constructor, anonmap_cache_destructor, NULL, 2385466Skchow NULL, NULL, 0); 2390Sstevel@tonic-gate swap_maxcontig = (1024 * 1024) >> PAGESHIFT; /* 1MB of pages */ 240749Ssusans 24112908SPavel.Tatashin@Sun.COM tmp = kmem_zalloc((ANI_MAX_POOL * sizeof (ani_free_t)) + 63, KM_SLEEP); 24212908SPavel.Tatashin@Sun.COM /* Round ani_free_pool to cacheline boundary to avoid false sharing. */ 24312908SPavel.Tatashin@Sun.COM ani_free_pool = (ani_free_t *)P2ROUNDUP((uintptr_t)tmp, 64); 24412908SPavel.Tatashin@Sun.COM 245749Ssusans anon_vp = vn_alloc(KM_SLEEP); 246749Ssusans vn_setops(anon_vp, swap_vnodeops); 247749Ssusans anon_vp->v_type = VREG; 248749Ssusans anon_vp->v_flag |= (VISSWAP|VISSWAPFS); 2490Sstevel@tonic-gate } 2500Sstevel@tonic-gate 2510Sstevel@tonic-gate /* 2520Sstevel@tonic-gate * Global anon slot hash table manipulation. 2530Sstevel@tonic-gate */ 2540Sstevel@tonic-gate 2550Sstevel@tonic-gate static void 2560Sstevel@tonic-gate anon_addhash(struct anon *ap) 2570Sstevel@tonic-gate { 2580Sstevel@tonic-gate int index; 2590Sstevel@tonic-gate 26012173SMichael.Corcoran@Sun.COM ASSERT(MUTEX_HELD(AH_MUTEX(ap->an_vp, ap->an_off))); 2610Sstevel@tonic-gate index = ANON_HASH(ap->an_vp, ap->an_off); 2620Sstevel@tonic-gate ap->an_hash = anon_hash[index]; 2630Sstevel@tonic-gate anon_hash[index] = ap; 2640Sstevel@tonic-gate } 2650Sstevel@tonic-gate 2660Sstevel@tonic-gate static void 2670Sstevel@tonic-gate anon_rmhash(struct anon *ap) 2680Sstevel@tonic-gate { 2690Sstevel@tonic-gate struct anon **app; 2700Sstevel@tonic-gate 27112173SMichael.Corcoran@Sun.COM ASSERT(MUTEX_HELD(AH_MUTEX(ap->an_vp, ap->an_off))); 2720Sstevel@tonic-gate 2730Sstevel@tonic-gate for (app = &anon_hash[ANON_HASH(ap->an_vp, ap->an_off)]; 2740Sstevel@tonic-gate *app; app = &((*app)->an_hash)) { 2750Sstevel@tonic-gate if (*app == ap) { 2760Sstevel@tonic-gate *app = ap->an_hash; 2770Sstevel@tonic-gate break; 2780Sstevel@tonic-gate } 2790Sstevel@tonic-gate } 2800Sstevel@tonic-gate } 2810Sstevel@tonic-gate 2820Sstevel@tonic-gate /* 2830Sstevel@tonic-gate * The anon array interfaces. Functions allocating, 2840Sstevel@tonic-gate * freeing array of pointers, and returning/setting 2850Sstevel@tonic-gate * entries in the array of pointers for a given offset. 2860Sstevel@tonic-gate * 2870Sstevel@tonic-gate * Create the list of pointers 2880Sstevel@tonic-gate */ 2890Sstevel@tonic-gate struct anon_hdr * 2900Sstevel@tonic-gate anon_create(pgcnt_t npages, int flags) 2910Sstevel@tonic-gate { 2920Sstevel@tonic-gate struct anon_hdr *ahp; 2930Sstevel@tonic-gate ulong_t nchunks; 2940Sstevel@tonic-gate int kmemflags = (flags & ANON_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP; 2950Sstevel@tonic-gate 2960Sstevel@tonic-gate if ((ahp = kmem_zalloc(sizeof (struct anon_hdr), kmemflags)) == NULL) { 2970Sstevel@tonic-gate return (NULL); 2980Sstevel@tonic-gate } 2990Sstevel@tonic-gate 3000Sstevel@tonic-gate mutex_init(&ahp->serial_lock, NULL, MUTEX_DEFAULT, NULL); 3010Sstevel@tonic-gate /* 3020Sstevel@tonic-gate * Single level case. 3030Sstevel@tonic-gate */ 3040Sstevel@tonic-gate ahp->size = npages; 3050Sstevel@tonic-gate if (npages <= ANON_CHUNK_SIZE || (flags & ANON_ALLOC_FORCE)) { 3060Sstevel@tonic-gate 3070Sstevel@tonic-gate if (flags & ANON_ALLOC_FORCE) 3080Sstevel@tonic-gate ahp->flags |= ANON_ALLOC_FORCE; 3090Sstevel@tonic-gate 3100Sstevel@tonic-gate ahp->array_chunk = kmem_zalloc( 3110Sstevel@tonic-gate ahp->size * sizeof (struct anon *), kmemflags); 3120Sstevel@tonic-gate 3130Sstevel@tonic-gate if (ahp->array_chunk == NULL) { 3140Sstevel@tonic-gate kmem_free(ahp, sizeof (struct anon_hdr)); 3150Sstevel@tonic-gate return (NULL); 3160Sstevel@tonic-gate } 3170Sstevel@tonic-gate } else { 3180Sstevel@tonic-gate /* 3190Sstevel@tonic-gate * 2 Level case. 3201660Sudpa * anon hdr size needs to be rounded off to be a multiple 3211660Sudpa * of ANON_CHUNK_SIZE. This is important as various anon 3221660Sudpa * related functions depend on this. 3231660Sudpa * NOTE - 3241660Sudpa * anon_grow() makes anon hdr size a multiple of 3251660Sudpa * ANON_CHUNK_SIZE. 3261660Sudpa * amp size is <= anon hdr size. 3271660Sudpa * anon_index + seg_pgs <= anon hdr size. 3280Sstevel@tonic-gate */ 3291660Sudpa ahp->size = P2ROUNDUP(npages, ANON_CHUNK_SIZE); 3301660Sudpa nchunks = ahp->size >> ANON_CHUNK_SHIFT; 3310Sstevel@tonic-gate 3320Sstevel@tonic-gate ahp->array_chunk = kmem_zalloc(nchunks * sizeof (ulong_t *), 3330Sstevel@tonic-gate kmemflags); 3340Sstevel@tonic-gate 3350Sstevel@tonic-gate if (ahp->array_chunk == NULL) { 3360Sstevel@tonic-gate kmem_free(ahp, sizeof (struct anon_hdr)); 3370Sstevel@tonic-gate return (NULL); 3380Sstevel@tonic-gate } 3390Sstevel@tonic-gate } 3400Sstevel@tonic-gate return (ahp); 3410Sstevel@tonic-gate } 3420Sstevel@tonic-gate 3430Sstevel@tonic-gate /* 3440Sstevel@tonic-gate * Free the array of pointers 3450Sstevel@tonic-gate */ 3460Sstevel@tonic-gate void 3470Sstevel@tonic-gate anon_release(struct anon_hdr *ahp, pgcnt_t npages) 3480Sstevel@tonic-gate { 3490Sstevel@tonic-gate ulong_t i; 3500Sstevel@tonic-gate void **ppp; 3510Sstevel@tonic-gate ulong_t nchunks; 3520Sstevel@tonic-gate 3531660Sudpa ASSERT(npages <= ahp->size); 3540Sstevel@tonic-gate 3550Sstevel@tonic-gate /* 3560Sstevel@tonic-gate * Single level case. 3570Sstevel@tonic-gate */ 3580Sstevel@tonic-gate if (npages <= ANON_CHUNK_SIZE || (ahp->flags & ANON_ALLOC_FORCE)) { 3590Sstevel@tonic-gate kmem_free(ahp->array_chunk, ahp->size * sizeof (struct anon *)); 3600Sstevel@tonic-gate } else { 3610Sstevel@tonic-gate /* 3620Sstevel@tonic-gate * 2 level case. 3630Sstevel@tonic-gate */ 3641660Sudpa nchunks = ahp->size >> ANON_CHUNK_SHIFT; 3650Sstevel@tonic-gate for (i = 0; i < nchunks; i++) { 3660Sstevel@tonic-gate ppp = &ahp->array_chunk[i]; 3670Sstevel@tonic-gate if (*ppp != NULL) 3680Sstevel@tonic-gate kmem_free(*ppp, PAGESIZE); 3690Sstevel@tonic-gate } 3700Sstevel@tonic-gate kmem_free(ahp->array_chunk, nchunks * sizeof (ulong_t *)); 3710Sstevel@tonic-gate } 3720Sstevel@tonic-gate mutex_destroy(&ahp->serial_lock); 3730Sstevel@tonic-gate kmem_free(ahp, sizeof (struct anon_hdr)); 3740Sstevel@tonic-gate } 3750Sstevel@tonic-gate 3760Sstevel@tonic-gate /* 3770Sstevel@tonic-gate * Return the pointer from the list for a 3780Sstevel@tonic-gate * specified anon index. 3790Sstevel@tonic-gate */ 3800Sstevel@tonic-gate struct anon * 3810Sstevel@tonic-gate anon_get_ptr(struct anon_hdr *ahp, ulong_t an_idx) 3820Sstevel@tonic-gate { 3830Sstevel@tonic-gate struct anon **app; 3840Sstevel@tonic-gate 3850Sstevel@tonic-gate ASSERT(an_idx < ahp->size); 3860Sstevel@tonic-gate 3870Sstevel@tonic-gate /* 3880Sstevel@tonic-gate * Single level case. 3890Sstevel@tonic-gate */ 3900Sstevel@tonic-gate if ((ahp->size <= ANON_CHUNK_SIZE) || (ahp->flags & ANON_ALLOC_FORCE)) { 3910Sstevel@tonic-gate return ((struct anon *) 3925466Skchow ((uintptr_t)ahp->array_chunk[an_idx] & ANON_PTRMASK)); 3930Sstevel@tonic-gate } else { 3940Sstevel@tonic-gate 3950Sstevel@tonic-gate /* 3960Sstevel@tonic-gate * 2 level case. 3970Sstevel@tonic-gate */ 3980Sstevel@tonic-gate app = ahp->array_chunk[an_idx >> ANON_CHUNK_SHIFT]; 3990Sstevel@tonic-gate if (app) { 4000Sstevel@tonic-gate return ((struct anon *) 4015466Skchow ((uintptr_t)app[an_idx & ANON_CHUNK_OFF] & 4025466Skchow ANON_PTRMASK)); 4030Sstevel@tonic-gate } else { 4040Sstevel@tonic-gate return (NULL); 4050Sstevel@tonic-gate } 4060Sstevel@tonic-gate } 4070Sstevel@tonic-gate } 4080Sstevel@tonic-gate 4090Sstevel@tonic-gate /* 4100Sstevel@tonic-gate * Return the anon pointer for the first valid entry in the anon list, 4110Sstevel@tonic-gate * starting from the given index. 4120Sstevel@tonic-gate */ 4130Sstevel@tonic-gate struct anon * 4140Sstevel@tonic-gate anon_get_next_ptr(struct anon_hdr *ahp, ulong_t *index) 4150Sstevel@tonic-gate { 4160Sstevel@tonic-gate struct anon *ap; 4170Sstevel@tonic-gate struct anon **app; 4180Sstevel@tonic-gate ulong_t chunkoff; 4190Sstevel@tonic-gate ulong_t i; 4200Sstevel@tonic-gate ulong_t j; 4210Sstevel@tonic-gate pgcnt_t size; 4220Sstevel@tonic-gate 4230Sstevel@tonic-gate i = *index; 4240Sstevel@tonic-gate size = ahp->size; 4250Sstevel@tonic-gate 4260Sstevel@tonic-gate ASSERT(i < size); 4270Sstevel@tonic-gate 4280Sstevel@tonic-gate if ((size <= ANON_CHUNK_SIZE) || (ahp->flags & ANON_ALLOC_FORCE)) { 4290Sstevel@tonic-gate /* 4300Sstevel@tonic-gate * 1 level case 4310Sstevel@tonic-gate */ 4320Sstevel@tonic-gate while (i < size) { 4330Sstevel@tonic-gate ap = (struct anon *) 4345466Skchow ((uintptr_t)ahp->array_chunk[i] & ANON_PTRMASK); 4350Sstevel@tonic-gate if (ap) { 4360Sstevel@tonic-gate *index = i; 4370Sstevel@tonic-gate return (ap); 4380Sstevel@tonic-gate } 4390Sstevel@tonic-gate i++; 4400Sstevel@tonic-gate } 4410Sstevel@tonic-gate } else { 4420Sstevel@tonic-gate /* 4430Sstevel@tonic-gate * 2 level case 4440Sstevel@tonic-gate */ 4450Sstevel@tonic-gate chunkoff = i & ANON_CHUNK_OFF; 4460Sstevel@tonic-gate while (i < size) { 4470Sstevel@tonic-gate app = ahp->array_chunk[i >> ANON_CHUNK_SHIFT]; 4480Sstevel@tonic-gate if (app) 4490Sstevel@tonic-gate for (j = chunkoff; j < ANON_CHUNK_SIZE; j++) { 4500Sstevel@tonic-gate ap = (struct anon *) 4515466Skchow ((uintptr_t)app[j] & ANON_PTRMASK); 4520Sstevel@tonic-gate if (ap) { 4530Sstevel@tonic-gate *index = i + (j - chunkoff); 4540Sstevel@tonic-gate return (ap); 4550Sstevel@tonic-gate } 4560Sstevel@tonic-gate } 4570Sstevel@tonic-gate chunkoff = 0; 4580Sstevel@tonic-gate i = (i + ANON_CHUNK_SIZE) & ~ANON_CHUNK_OFF; 4590Sstevel@tonic-gate } 4600Sstevel@tonic-gate } 4610Sstevel@tonic-gate *index = size; 4620Sstevel@tonic-gate return (NULL); 4630Sstevel@tonic-gate } 4640Sstevel@tonic-gate 4650Sstevel@tonic-gate /* 4660Sstevel@tonic-gate * Set list entry with a given pointer for a specified offset 4670Sstevel@tonic-gate */ 4680Sstevel@tonic-gate int 4690Sstevel@tonic-gate anon_set_ptr(struct anon_hdr *ahp, ulong_t an_idx, struct anon *ap, int flags) 4700Sstevel@tonic-gate { 4710Sstevel@tonic-gate void **ppp; 4720Sstevel@tonic-gate struct anon **app; 4730Sstevel@tonic-gate int kmemflags = (flags & ANON_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP; 4740Sstevel@tonic-gate uintptr_t *ap_addr; 4750Sstevel@tonic-gate 4760Sstevel@tonic-gate ASSERT(an_idx < ahp->size); 4770Sstevel@tonic-gate 4780Sstevel@tonic-gate /* 4790Sstevel@tonic-gate * Single level case. 4800Sstevel@tonic-gate */ 4810Sstevel@tonic-gate if (ahp->size <= ANON_CHUNK_SIZE || (ahp->flags & ANON_ALLOC_FORCE)) { 4820Sstevel@tonic-gate ap_addr = (uintptr_t *)&ahp->array_chunk[an_idx]; 4830Sstevel@tonic-gate } else { 4840Sstevel@tonic-gate 4850Sstevel@tonic-gate /* 4860Sstevel@tonic-gate * 2 level case. 4870Sstevel@tonic-gate */ 4880Sstevel@tonic-gate ppp = &ahp->array_chunk[an_idx >> ANON_CHUNK_SHIFT]; 4890Sstevel@tonic-gate 4900Sstevel@tonic-gate ASSERT(ppp != NULL); 4910Sstevel@tonic-gate if (*ppp == NULL) { 4920Sstevel@tonic-gate mutex_enter(&ahp->serial_lock); 4930Sstevel@tonic-gate ppp = &ahp->array_chunk[an_idx >> ANON_CHUNK_SHIFT]; 4940Sstevel@tonic-gate if (*ppp == NULL) { 4950Sstevel@tonic-gate *ppp = kmem_zalloc(PAGESIZE, kmemflags); 4960Sstevel@tonic-gate if (*ppp == NULL) { 4970Sstevel@tonic-gate mutex_exit(&ahp->serial_lock); 4980Sstevel@tonic-gate return (ENOMEM); 4990Sstevel@tonic-gate } 5000Sstevel@tonic-gate } 5010Sstevel@tonic-gate mutex_exit(&ahp->serial_lock); 5020Sstevel@tonic-gate } 5030Sstevel@tonic-gate app = *ppp; 5040Sstevel@tonic-gate ap_addr = (uintptr_t *)&app[an_idx & ANON_CHUNK_OFF]; 5050Sstevel@tonic-gate } 5060Sstevel@tonic-gate *ap_addr = (*ap_addr & ~ANON_PTRMASK) | (uintptr_t)ap; 5070Sstevel@tonic-gate return (0); 5080Sstevel@tonic-gate } 5090Sstevel@tonic-gate 5100Sstevel@tonic-gate /* 5110Sstevel@tonic-gate * Copy anon array into a given new anon array 5120Sstevel@tonic-gate */ 5130Sstevel@tonic-gate int 5140Sstevel@tonic-gate anon_copy_ptr(struct anon_hdr *sahp, ulong_t s_idx, 5150Sstevel@tonic-gate struct anon_hdr *dahp, ulong_t d_idx, 5160Sstevel@tonic-gate pgcnt_t npages, int flags) 5170Sstevel@tonic-gate { 5180Sstevel@tonic-gate void **sapp, **dapp; 5190Sstevel@tonic-gate void *ap; 5200Sstevel@tonic-gate int kmemflags = (flags & ANON_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP; 5210Sstevel@tonic-gate 5220Sstevel@tonic-gate ASSERT((s_idx < sahp->size) && (d_idx < dahp->size)); 5230Sstevel@tonic-gate ASSERT((npages <= sahp->size) && (npages <= dahp->size)); 5240Sstevel@tonic-gate 5250Sstevel@tonic-gate /* 5260Sstevel@tonic-gate * Both arrays are 1 level. 5270Sstevel@tonic-gate */ 5280Sstevel@tonic-gate if (((sahp->size <= ANON_CHUNK_SIZE) && 5290Sstevel@tonic-gate (dahp->size <= ANON_CHUNK_SIZE)) || 5300Sstevel@tonic-gate ((sahp->flags & ANON_ALLOC_FORCE) && 5310Sstevel@tonic-gate (dahp->flags & ANON_ALLOC_FORCE))) { 5320Sstevel@tonic-gate 5330Sstevel@tonic-gate bcopy(&sahp->array_chunk[s_idx], &dahp->array_chunk[d_idx], 5340Sstevel@tonic-gate npages * sizeof (struct anon *)); 5350Sstevel@tonic-gate return (0); 5360Sstevel@tonic-gate } 5370Sstevel@tonic-gate 5380Sstevel@tonic-gate /* 5390Sstevel@tonic-gate * Both arrays are 2 levels. 5400Sstevel@tonic-gate */ 5410Sstevel@tonic-gate if (sahp->size > ANON_CHUNK_SIZE && 5420Sstevel@tonic-gate dahp->size > ANON_CHUNK_SIZE && 5430Sstevel@tonic-gate ((sahp->flags & ANON_ALLOC_FORCE) == 0) && 5440Sstevel@tonic-gate ((dahp->flags & ANON_ALLOC_FORCE) == 0)) { 5450Sstevel@tonic-gate 5460Sstevel@tonic-gate ulong_t sapidx, dapidx; 5470Sstevel@tonic-gate ulong_t *sap, *dap; 5480Sstevel@tonic-gate ulong_t chknp; 5490Sstevel@tonic-gate 5500Sstevel@tonic-gate while (npages != 0) { 5510Sstevel@tonic-gate 5520Sstevel@tonic-gate sapidx = s_idx & ANON_CHUNK_OFF; 5530Sstevel@tonic-gate dapidx = d_idx & ANON_CHUNK_OFF; 5540Sstevel@tonic-gate chknp = ANON_CHUNK_SIZE - MAX(sapidx, dapidx); 5550Sstevel@tonic-gate if (chknp > npages) 5560Sstevel@tonic-gate chknp = npages; 5570Sstevel@tonic-gate 5580Sstevel@tonic-gate sapp = &sahp->array_chunk[s_idx >> ANON_CHUNK_SHIFT]; 5590Sstevel@tonic-gate if ((sap = *sapp) != NULL) { 5600Sstevel@tonic-gate dapp = &dahp->array_chunk[d_idx 5615466Skchow >> ANON_CHUNK_SHIFT]; 5620Sstevel@tonic-gate if ((dap = *dapp) == NULL) { 5630Sstevel@tonic-gate *dapp = kmem_zalloc(PAGESIZE, 5640Sstevel@tonic-gate kmemflags); 5650Sstevel@tonic-gate if ((dap = *dapp) == NULL) 5660Sstevel@tonic-gate return (ENOMEM); 5670Sstevel@tonic-gate } 5680Sstevel@tonic-gate bcopy((sap + sapidx), (dap + dapidx), 5690Sstevel@tonic-gate chknp << ANON_PTRSHIFT); 5700Sstevel@tonic-gate } 5710Sstevel@tonic-gate s_idx += chknp; 5720Sstevel@tonic-gate d_idx += chknp; 5730Sstevel@tonic-gate npages -= chknp; 5740Sstevel@tonic-gate } 5750Sstevel@tonic-gate return (0); 5760Sstevel@tonic-gate } 5770Sstevel@tonic-gate 5780Sstevel@tonic-gate /* 5790Sstevel@tonic-gate * At least one of the arrays is 2 level. 5800Sstevel@tonic-gate */ 5810Sstevel@tonic-gate while (npages--) { 5820Sstevel@tonic-gate if ((ap = anon_get_ptr(sahp, s_idx)) != NULL) { 5830Sstevel@tonic-gate ASSERT(!ANON_ISBUSY(anon_get_slot(sahp, s_idx))); 5840Sstevel@tonic-gate if (anon_set_ptr(dahp, d_idx, ap, flags) == ENOMEM) 5850Sstevel@tonic-gate return (ENOMEM); 5860Sstevel@tonic-gate } 5870Sstevel@tonic-gate s_idx++; 5880Sstevel@tonic-gate d_idx++; 5890Sstevel@tonic-gate } 5900Sstevel@tonic-gate return (0); 5910Sstevel@tonic-gate } 5920Sstevel@tonic-gate 5930Sstevel@tonic-gate 5940Sstevel@tonic-gate /* 5950Sstevel@tonic-gate * ANON_INITBUF is a convenience macro for anon_grow() below. It 5960Sstevel@tonic-gate * takes a buffer dst, which is at least as large as buffer src. It 5970Sstevel@tonic-gate * does a bcopy from src into dst, and then bzeros the extra bytes 5980Sstevel@tonic-gate * of dst. If tail is set, the data in src is tail aligned within 5990Sstevel@tonic-gate * dst instead of head aligned. 6000Sstevel@tonic-gate */ 6010Sstevel@tonic-gate 6020Sstevel@tonic-gate #define ANON_INITBUF(src, srclen, dst, dstsize, tail) \ 6030Sstevel@tonic-gate if (tail) { \ 6040Sstevel@tonic-gate bzero((dst), (dstsize) - (srclen)); \ 6050Sstevel@tonic-gate bcopy((src), (char *)(dst) + (dstsize) - (srclen), (srclen)); \ 6060Sstevel@tonic-gate } else { \ 6070Sstevel@tonic-gate bcopy((src), (dst), (srclen)); \ 6080Sstevel@tonic-gate bzero((char *)(dst) + (srclen), (dstsize) - (srclen)); \ 6090Sstevel@tonic-gate } 6100Sstevel@tonic-gate 6110Sstevel@tonic-gate #define ANON_1_LEVEL_INC (ANON_CHUNK_SIZE / 8) 6120Sstevel@tonic-gate #define ANON_2_LEVEL_INC (ANON_1_LEVEL_INC * ANON_CHUNK_SIZE) 6130Sstevel@tonic-gate 6140Sstevel@tonic-gate /* 6150Sstevel@tonic-gate * anon_grow() is used to efficiently extend an existing anon array. 6160Sstevel@tonic-gate * startidx_p points to the index into the anon array of the first page 617575Sstans * that is in use. oldseg_pgs is the number of pages in use, starting at 6180Sstevel@tonic-gate * *startidx_p. newpages is the number of additional pages desired. 6190Sstevel@tonic-gate * 6200Sstevel@tonic-gate * If startidx_p == NULL, startidx is taken to be 0 and cannot be changed. 6210Sstevel@tonic-gate * 6220Sstevel@tonic-gate * The growth is done by creating a new top level of the anon array, 6230Sstevel@tonic-gate * and (if the array is 2-level) reusing the existing second level arrays. 6240Sstevel@tonic-gate * 6250Sstevel@tonic-gate * flags can be used to specify ANON_NOSLEEP and ANON_GROWDOWN. 6260Sstevel@tonic-gate * 6270Sstevel@tonic-gate * Returns the new number of pages in the anon array. 6280Sstevel@tonic-gate */ 6290Sstevel@tonic-gate pgcnt_t 630575Sstans anon_grow(struct anon_hdr *ahp, ulong_t *startidx_p, pgcnt_t oldseg_pgs, 631575Sstans pgcnt_t newseg_pgs, int flags) 6320Sstevel@tonic-gate { 6330Sstevel@tonic-gate ulong_t startidx = startidx_p ? *startidx_p : 0; 634575Sstans pgcnt_t oldamp_pgs = ahp->size, newamp_pgs; 6350Sstevel@tonic-gate pgcnt_t oelems, nelems, totpages; 6360Sstevel@tonic-gate void **level1; 6370Sstevel@tonic-gate int kmemflags = (flags & ANON_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP; 6380Sstevel@tonic-gate int growdown = (flags & ANON_GROWDOWN); 6390Sstevel@tonic-gate size_t newarrsz, oldarrsz; 6400Sstevel@tonic-gate void *level2; 6410Sstevel@tonic-gate 6420Sstevel@tonic-gate ASSERT(!(startidx_p == NULL && growdown)); 643575Sstans ASSERT(startidx + oldseg_pgs <= ahp->size); 6440Sstevel@tonic-gate 6450Sstevel@tonic-gate /* 6460Sstevel@tonic-gate * Determine the total number of pages needed in the new 6470Sstevel@tonic-gate * anon array. If growing down, totpages is all pages from 648575Sstans * startidx through the end of the array, plus <newseg_pgs> 6490Sstevel@tonic-gate * pages. If growing up, keep all pages from page 0 through 650575Sstans * the last page currently in use, plus <newseg_pgs> pages. 6510Sstevel@tonic-gate */ 6520Sstevel@tonic-gate if (growdown) 653575Sstans totpages = oldamp_pgs - startidx + newseg_pgs; 6540Sstevel@tonic-gate else 655575Sstans totpages = startidx + oldseg_pgs + newseg_pgs; 6560Sstevel@tonic-gate 6570Sstevel@tonic-gate /* If the array is already large enough, just return. */ 6580Sstevel@tonic-gate 659575Sstans if (oldamp_pgs >= totpages) { 660575Sstans if (growdown) 661575Sstans *startidx_p = oldamp_pgs - totpages; 662575Sstans return (oldamp_pgs); 6630Sstevel@tonic-gate } 6640Sstevel@tonic-gate 6650Sstevel@tonic-gate /* 666575Sstans * oldamp_pgs/newamp_pgs are the total numbers of pages represented 667575Sstans * by the corresponding arrays. 668575Sstans * oelems/nelems are the number of pointers in the top level arrays 669575Sstans * which may be either level 1 or level 2. 6700Sstevel@tonic-gate * Will the new anon array be one level or two levels? 6710Sstevel@tonic-gate */ 6720Sstevel@tonic-gate if (totpages <= ANON_CHUNK_SIZE || (ahp->flags & ANON_ALLOC_FORCE)) { 673575Sstans newamp_pgs = P2ROUNDUP(totpages, ANON_1_LEVEL_INC); 674575Sstans oelems = oldamp_pgs; 675575Sstans nelems = newamp_pgs; 6760Sstevel@tonic-gate } else { 677575Sstans newamp_pgs = P2ROUNDUP(totpages, ANON_2_LEVEL_INC); 678575Sstans oelems = (oldamp_pgs + ANON_CHUNK_OFF) >> ANON_CHUNK_SHIFT; 679575Sstans nelems = newamp_pgs >> ANON_CHUNK_SHIFT; 6800Sstevel@tonic-gate } 6810Sstevel@tonic-gate 6820Sstevel@tonic-gate newarrsz = nelems * sizeof (void *); 6830Sstevel@tonic-gate level1 = kmem_alloc(newarrsz, kmemflags); 6840Sstevel@tonic-gate if (level1 == NULL) 6850Sstevel@tonic-gate return (0); 6860Sstevel@tonic-gate 6870Sstevel@tonic-gate /* Are we converting from a one level to a two level anon array? */ 6880Sstevel@tonic-gate 689575Sstans if (newamp_pgs > ANON_CHUNK_SIZE && oldamp_pgs <= ANON_CHUNK_SIZE && 6900Sstevel@tonic-gate !(ahp->flags & ANON_ALLOC_FORCE)) { 691575Sstans 6920Sstevel@tonic-gate /* 6930Sstevel@tonic-gate * Yes, we're converting to a two level. Reuse old level 1 6940Sstevel@tonic-gate * as new level 2 if it is exactly PAGESIZE. Otherwise 6950Sstevel@tonic-gate * alloc a new level 2 and copy the old level 1 data into it. 6960Sstevel@tonic-gate */ 697575Sstans if (oldamp_pgs == ANON_CHUNK_SIZE) { 6980Sstevel@tonic-gate level2 = (void *)ahp->array_chunk; 6990Sstevel@tonic-gate } else { 7000Sstevel@tonic-gate level2 = kmem_alloc(PAGESIZE, kmemflags); 7010Sstevel@tonic-gate if (level2 == NULL) { 7020Sstevel@tonic-gate kmem_free(level1, newarrsz); 7030Sstevel@tonic-gate return (0); 7040Sstevel@tonic-gate } 705575Sstans oldarrsz = oldamp_pgs * sizeof (void *); 7060Sstevel@tonic-gate 7070Sstevel@tonic-gate ANON_INITBUF(ahp->array_chunk, oldarrsz, 7080Sstevel@tonic-gate level2, PAGESIZE, growdown); 7090Sstevel@tonic-gate kmem_free(ahp->array_chunk, oldarrsz); 7100Sstevel@tonic-gate } 7110Sstevel@tonic-gate bzero(level1, newarrsz); 7120Sstevel@tonic-gate if (growdown) 7130Sstevel@tonic-gate level1[nelems - 1] = level2; 7140Sstevel@tonic-gate else 7150Sstevel@tonic-gate level1[0] = level2; 7160Sstevel@tonic-gate } else { 7170Sstevel@tonic-gate oldarrsz = oelems * sizeof (void *); 7180Sstevel@tonic-gate 7190Sstevel@tonic-gate ANON_INITBUF(ahp->array_chunk, oldarrsz, 7200Sstevel@tonic-gate level1, newarrsz, growdown); 7210Sstevel@tonic-gate kmem_free(ahp->array_chunk, oldarrsz); 7220Sstevel@tonic-gate } 7230Sstevel@tonic-gate 7240Sstevel@tonic-gate ahp->array_chunk = level1; 725575Sstans ahp->size = newamp_pgs; 7261660Sudpa if (growdown) 727575Sstans *startidx_p = newamp_pgs - totpages; 7281660Sudpa 729575Sstans return (newamp_pgs); 7300Sstevel@tonic-gate } 7310Sstevel@tonic-gate 732575Sstans 7330Sstevel@tonic-gate /* 73412908SPavel.Tatashin@Sun.COM * Called to sync ani_free value. 7350Sstevel@tonic-gate */ 7360Sstevel@tonic-gate 7370Sstevel@tonic-gate void 7380Sstevel@tonic-gate set_anoninfo(void) 7390Sstevel@tonic-gate { 74012908SPavel.Tatashin@Sun.COM processorid_t ix, max_seqid; 74112908SPavel.Tatashin@Sun.COM pgcnt_t total = 0; 74212908SPavel.Tatashin@Sun.COM static clock_t last_time; 74312908SPavel.Tatashin@Sun.COM clock_t new_time; 74412908SPavel.Tatashin@Sun.COM 74512908SPavel.Tatashin@Sun.COM if (ani_free_pool == NULL) 74612908SPavel.Tatashin@Sun.COM return; 74712908SPavel.Tatashin@Sun.COM 74812908SPavel.Tatashin@Sun.COM /* 74912908SPavel.Tatashin@Sun.COM * Recompute ani_free at most once per tick. Use max_cpu_seqid_ever to 75012908SPavel.Tatashin@Sun.COM * identify the maximum number of CPUs were ever online. 75112908SPavel.Tatashin@Sun.COM */ 75212908SPavel.Tatashin@Sun.COM new_time = ddi_get_lbolt(); 75312908SPavel.Tatashin@Sun.COM if (new_time > last_time) { 75412908SPavel.Tatashin@Sun.COM 75512908SPavel.Tatashin@Sun.COM max_seqid = max_cpu_seqid_ever; 75612908SPavel.Tatashin@Sun.COM ASSERT(ANI_MAX_POOL > max_seqid); 75712908SPavel.Tatashin@Sun.COM for (ix = 0; ix <= max_seqid; ix++) 75812908SPavel.Tatashin@Sun.COM total += ani_free_pool[ix].ani_count; 75912908SPavel.Tatashin@Sun.COM 76012908SPavel.Tatashin@Sun.COM last_time = new_time; 76112908SPavel.Tatashin@Sun.COM k_anoninfo.ani_free = total; 7620Sstevel@tonic-gate } 7630Sstevel@tonic-gate } 7640Sstevel@tonic-gate 7650Sstevel@tonic-gate /* 7660Sstevel@tonic-gate * Reserve anon space. 7670Sstevel@tonic-gate * 7680Sstevel@tonic-gate * It's no longer simply a matter of incrementing ani_resv to 7690Sstevel@tonic-gate * reserve swap space, we need to check memory-based as well 7700Sstevel@tonic-gate * as disk-backed (physical) swap. The following algorithm 7710Sstevel@tonic-gate * is used: 7720Sstevel@tonic-gate * Check the space on physical swap 7730Sstevel@tonic-gate * i.e. amount needed < ani_max - ani_phys_resv 7740Sstevel@tonic-gate * If we are swapping on swapfs check 7750Sstevel@tonic-gate * amount needed < (availrmem - swapfs_minfree) 7760Sstevel@tonic-gate * Since the algorithm to check for the quantity of swap space is 7770Sstevel@tonic-gate * almost the same as that for reserving it, we'll just use anon_resvmem 7780Sstevel@tonic-gate * with a flag to decrement availrmem. 7790Sstevel@tonic-gate * 7800Sstevel@tonic-gate * Return non-zero on success. 7810Sstevel@tonic-gate */ 7820Sstevel@tonic-gate int 7834426Saguzovsk anon_resvmem(size_t size, boolean_t takemem, zone_t *zone, int tryhard) 7840Sstevel@tonic-gate { 7850Sstevel@tonic-gate pgcnt_t npages = btopr(size); 7860Sstevel@tonic-gate pgcnt_t mswap_pages = 0; 7870Sstevel@tonic-gate pgcnt_t pswap_pages = 0; 7883247Sgjelinek proc_t *p = curproc; 7893247Sgjelinek 7903247Sgjelinek if (zone != NULL && takemem) { 7913247Sgjelinek /* test zone.max-swap resource control */ 7923247Sgjelinek mutex_enter(&p->p_lock); 7933247Sgjelinek if (rctl_incr_swap(p, zone, ptob(npages)) != 0) { 7943247Sgjelinek mutex_exit(&p->p_lock); 7953247Sgjelinek return (0); 7963247Sgjelinek } 7973247Sgjelinek mutex_exit(&p->p_lock); 7983247Sgjelinek } 7990Sstevel@tonic-gate mutex_enter(&anoninfo_lock); 8000Sstevel@tonic-gate 8010Sstevel@tonic-gate /* 8020Sstevel@tonic-gate * pswap_pages is the number of pages we can take from 8030Sstevel@tonic-gate * physical (i.e. disk-backed) swap. 8040Sstevel@tonic-gate */ 8050Sstevel@tonic-gate ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv); 8060Sstevel@tonic-gate pswap_pages = k_anoninfo.ani_max - k_anoninfo.ani_phys_resv; 8070Sstevel@tonic-gate 8080Sstevel@tonic-gate ANON_PRINT(A_RESV, 8090Sstevel@tonic-gate ("anon_resvmem: npages %lu takemem %u pswap %lu caller %p\n", 8100Sstevel@tonic-gate npages, takemem, pswap_pages, (void *)caller())); 8110Sstevel@tonic-gate 8120Sstevel@tonic-gate if (npages <= pswap_pages) { 8130Sstevel@tonic-gate /* 8140Sstevel@tonic-gate * we have enough space on a physical swap 8150Sstevel@tonic-gate */ 8160Sstevel@tonic-gate if (takemem) 8170Sstevel@tonic-gate k_anoninfo.ani_phys_resv += npages; 8180Sstevel@tonic-gate mutex_exit(&anoninfo_lock); 8190Sstevel@tonic-gate return (1); 8200Sstevel@tonic-gate } else if (pswap_pages != 0) { 8210Sstevel@tonic-gate /* 8220Sstevel@tonic-gate * we have some space on a physical swap 8230Sstevel@tonic-gate */ 8240Sstevel@tonic-gate if (takemem) { 8250Sstevel@tonic-gate /* 8260Sstevel@tonic-gate * use up remainder of phys swap 8270Sstevel@tonic-gate */ 8280Sstevel@tonic-gate k_anoninfo.ani_phys_resv += pswap_pages; 8290Sstevel@tonic-gate ASSERT(k_anoninfo.ani_phys_resv == k_anoninfo.ani_max); 8300Sstevel@tonic-gate } 8310Sstevel@tonic-gate } 8320Sstevel@tonic-gate /* 8330Sstevel@tonic-gate * since (npages > pswap_pages) we need mem swap 8340Sstevel@tonic-gate * mswap_pages is the number of pages needed from availrmem 8350Sstevel@tonic-gate */ 8360Sstevel@tonic-gate ASSERT(npages > pswap_pages); 8370Sstevel@tonic-gate mswap_pages = npages - pswap_pages; 8380Sstevel@tonic-gate 8390Sstevel@tonic-gate ANON_PRINT(A_RESV, ("anon_resvmem: need %ld pages from memory\n", 8400Sstevel@tonic-gate mswap_pages)); 8410Sstevel@tonic-gate 8420Sstevel@tonic-gate /* 8430Sstevel@tonic-gate * priv processes can reserve memory as swap as long as availrmem 8440Sstevel@tonic-gate * remains greater than swapfs_minfree; in the case of non-priv 8450Sstevel@tonic-gate * processes, memory can be reserved as swap only if availrmem 8460Sstevel@tonic-gate * doesn't fall below (swapfs_minfree + swapfs_reserve). Thus, 8470Sstevel@tonic-gate * swapfs_reserve amount of memswap is not available to non-priv 8480Sstevel@tonic-gate * processes. This protects daemons such as automounter dying 8490Sstevel@tonic-gate * as a result of application processes eating away almost entire 8500Sstevel@tonic-gate * membased swap. This safeguard becomes useless if apps are run 8510Sstevel@tonic-gate * with root access. 8520Sstevel@tonic-gate * 8530Sstevel@tonic-gate * swapfs_reserve is minimum of 4Mb or 1/16 of physmem. 8540Sstevel@tonic-gate * 8550Sstevel@tonic-gate */ 8564426Saguzovsk if (tryhard) { 85710154SStan.Studzinski@Sun.COM pgcnt_t floor_pages; 85810154SStan.Studzinski@Sun.COM 85910154SStan.Studzinski@Sun.COM if (secpolicy_resource_anon_mem(CRED())) { 86010154SStan.Studzinski@Sun.COM floor_pages = swapfs_minfree; 86110154SStan.Studzinski@Sun.COM } else { 86210154SStan.Studzinski@Sun.COM floor_pages = swapfs_minfree + swapfs_reserve; 86310154SStan.Studzinski@Sun.COM } 86410154SStan.Studzinski@Sun.COM 8654426Saguzovsk mutex_exit(&anoninfo_lock); 866*13035SOndrej.Kubecka@Sun.COM (void) page_reclaim_mem(mswap_pages, floor_pages, 0); 8674426Saguzovsk mutex_enter(&anoninfo_lock); 8684426Saguzovsk } 8692048Sstans 8700Sstevel@tonic-gate mutex_enter(&freemem_lock); 8710Sstevel@tonic-gate if (availrmem > (swapfs_minfree + swapfs_reserve + mswap_pages) || 8725466Skchow (availrmem > (swapfs_minfree + mswap_pages) && 8735466Skchow secpolicy_resource(CRED()) == 0)) { 8740Sstevel@tonic-gate 8750Sstevel@tonic-gate if (takemem) { 8760Sstevel@tonic-gate /* 8770Sstevel@tonic-gate * Take the memory from the rest of the system. 8780Sstevel@tonic-gate */ 8790Sstevel@tonic-gate availrmem -= mswap_pages; 8800Sstevel@tonic-gate mutex_exit(&freemem_lock); 8810Sstevel@tonic-gate k_anoninfo.ani_mem_resv += mswap_pages; 8820Sstevel@tonic-gate ANI_ADD(mswap_pages); 8830Sstevel@tonic-gate ANON_PRINT((A_RESV | A_MRESV), 8845466Skchow ("anon_resvmem: took %ld pages of availrmem\n", 8855466Skchow mswap_pages)); 8860Sstevel@tonic-gate } else { 8870Sstevel@tonic-gate mutex_exit(&freemem_lock); 8880Sstevel@tonic-gate } 8890Sstevel@tonic-gate 8900Sstevel@tonic-gate ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv); 8910Sstevel@tonic-gate mutex_exit(&anoninfo_lock); 8920Sstevel@tonic-gate return (1); 8930Sstevel@tonic-gate } else { 8940Sstevel@tonic-gate /* 8950Sstevel@tonic-gate * Fail if not enough memory 8960Sstevel@tonic-gate */ 8970Sstevel@tonic-gate if (takemem) { 8980Sstevel@tonic-gate k_anoninfo.ani_phys_resv -= pswap_pages; 8990Sstevel@tonic-gate } 9000Sstevel@tonic-gate 9010Sstevel@tonic-gate mutex_exit(&freemem_lock); 9020Sstevel@tonic-gate mutex_exit(&anoninfo_lock); 9030Sstevel@tonic-gate ANON_PRINT(A_RESV, 9045466Skchow ("anon_resvmem: not enough space from swapfs\n")); 9053247Sgjelinek if (zone != NULL && takemem) 9063247Sgjelinek rctl_decr_swap(zone, ptob(npages)); 9070Sstevel@tonic-gate return (0); 9080Sstevel@tonic-gate } 9090Sstevel@tonic-gate } 9100Sstevel@tonic-gate 9110Sstevel@tonic-gate /* 9120Sstevel@tonic-gate * Give back an anon reservation. 9130Sstevel@tonic-gate */ 9140Sstevel@tonic-gate void 9153247Sgjelinek anon_unresvmem(size_t size, zone_t *zone) 9160Sstevel@tonic-gate { 9170Sstevel@tonic-gate pgcnt_t npages = btopr(size); 9180Sstevel@tonic-gate spgcnt_t mem_free_pages = 0; 9190Sstevel@tonic-gate pgcnt_t phys_free_slots; 9200Sstevel@tonic-gate #ifdef ANON_DEBUG 9210Sstevel@tonic-gate pgcnt_t mem_resv; 9220Sstevel@tonic-gate #endif 9233247Sgjelinek if (zone != NULL) 9243379Ssl108498 rctl_decr_swap(zone, ptob(npages)); 9250Sstevel@tonic-gate 9260Sstevel@tonic-gate mutex_enter(&anoninfo_lock); 9270Sstevel@tonic-gate 9280Sstevel@tonic-gate ASSERT(k_anoninfo.ani_mem_resv >= k_anoninfo.ani_locked_swap); 9296695Saguzovsk 9300Sstevel@tonic-gate /* 9310Sstevel@tonic-gate * If some of this reservation belonged to swapfs 9320Sstevel@tonic-gate * give it back to availrmem. 9330Sstevel@tonic-gate * ani_mem_resv is the amount of availrmem swapfs has reserved. 9340Sstevel@tonic-gate * but some of that memory could be locked by segspt so we can only 9350Sstevel@tonic-gate * return non locked ani_mem_resv back to availrmem 9360Sstevel@tonic-gate */ 9370Sstevel@tonic-gate if (k_anoninfo.ani_mem_resv > k_anoninfo.ani_locked_swap) { 9380Sstevel@tonic-gate ANON_PRINT((A_RESV | A_MRESV), 9390Sstevel@tonic-gate ("anon_unresv: growing availrmem by %ld pages\n", 9400Sstevel@tonic-gate MIN(k_anoninfo.ani_mem_resv, npages))); 9410Sstevel@tonic-gate 9420Sstevel@tonic-gate mem_free_pages = MIN((spgcnt_t)(k_anoninfo.ani_mem_resv - 9430Sstevel@tonic-gate k_anoninfo.ani_locked_swap), npages); 9440Sstevel@tonic-gate mutex_enter(&freemem_lock); 9450Sstevel@tonic-gate availrmem += mem_free_pages; 9460Sstevel@tonic-gate mutex_exit(&freemem_lock); 9470Sstevel@tonic-gate k_anoninfo.ani_mem_resv -= mem_free_pages; 9480Sstevel@tonic-gate 9490Sstevel@tonic-gate ANI_ADD(-mem_free_pages); 9500Sstevel@tonic-gate } 9510Sstevel@tonic-gate /* 9520Sstevel@tonic-gate * The remainder of the pages is returned to phys swap 9530Sstevel@tonic-gate */ 9540Sstevel@tonic-gate ASSERT(npages >= mem_free_pages); 9550Sstevel@tonic-gate phys_free_slots = npages - mem_free_pages; 9560Sstevel@tonic-gate 9570Sstevel@tonic-gate if (phys_free_slots) { 9585466Skchow k_anoninfo.ani_phys_resv -= phys_free_slots; 9590Sstevel@tonic-gate } 9600Sstevel@tonic-gate 9610Sstevel@tonic-gate #ifdef ANON_DEBUG 9620Sstevel@tonic-gate mem_resv = k_anoninfo.ani_mem_resv; 9630Sstevel@tonic-gate #endif 9640Sstevel@tonic-gate 9650Sstevel@tonic-gate ASSERT(k_anoninfo.ani_mem_resv >= k_anoninfo.ani_locked_swap); 9660Sstevel@tonic-gate ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv); 9670Sstevel@tonic-gate 9680Sstevel@tonic-gate mutex_exit(&anoninfo_lock); 9690Sstevel@tonic-gate 9700Sstevel@tonic-gate ANON_PRINT(A_RESV, ("anon_unresv: %lu, tot %lu, caller %p\n", 9710Sstevel@tonic-gate npages, mem_resv, (void *)caller())); 9720Sstevel@tonic-gate } 9730Sstevel@tonic-gate 9740Sstevel@tonic-gate /* 9750Sstevel@tonic-gate * Allocate an anon slot and return it with the lock held. 9760Sstevel@tonic-gate */ 9770Sstevel@tonic-gate struct anon * 9780Sstevel@tonic-gate anon_alloc(struct vnode *vp, anoff_t off) 9790Sstevel@tonic-gate { 9800Sstevel@tonic-gate struct anon *ap; 9810Sstevel@tonic-gate kmutex_t *ahm; 9820Sstevel@tonic-gate 9830Sstevel@tonic-gate ap = kmem_cache_alloc(anon_cache, KM_SLEEP); 9840Sstevel@tonic-gate if (vp == NULL) { 9850Sstevel@tonic-gate swap_alloc(ap); 9860Sstevel@tonic-gate } else { 9870Sstevel@tonic-gate ap->an_vp = vp; 9880Sstevel@tonic-gate ap->an_off = off; 9890Sstevel@tonic-gate } 9900Sstevel@tonic-gate ap->an_refcnt = 1; 9910Sstevel@tonic-gate ap->an_pvp = NULL; 9920Sstevel@tonic-gate ap->an_poff = 0; 99312173SMichael.Corcoran@Sun.COM ahm = AH_MUTEX(ap->an_vp, ap->an_off); 9940Sstevel@tonic-gate mutex_enter(ahm); 9950Sstevel@tonic-gate anon_addhash(ap); 9960Sstevel@tonic-gate mutex_exit(ahm); 9970Sstevel@tonic-gate ANI_ADD(-1); 9980Sstevel@tonic-gate ANON_PRINT(A_ANON, ("anon_alloc: returning ap %p, vp %p\n", 9990Sstevel@tonic-gate (void *)ap, (ap ? (void *)ap->an_vp : NULL))); 10000Sstevel@tonic-gate return (ap); 10010Sstevel@tonic-gate } 10020Sstevel@tonic-gate 10030Sstevel@tonic-gate /* 10046695Saguzovsk * Called for pages locked in memory via softlock/pagelock/mlock to make sure 10056695Saguzovsk * such pages don't consume any physical swap resources needed for swapping 10066695Saguzovsk * unlocked pages. 10076695Saguzovsk */ 10086695Saguzovsk void 10096695Saguzovsk anon_swap_free(struct anon *ap, page_t *pp) 10106695Saguzovsk { 10116695Saguzovsk kmutex_t *ahm; 10126695Saguzovsk 10136695Saguzovsk ASSERT(ap != NULL); 10146695Saguzovsk ASSERT(pp != NULL); 10156695Saguzovsk ASSERT(PAGE_LOCKED(pp)); 10166695Saguzovsk ASSERT(pp->p_vnode != NULL); 10176695Saguzovsk ASSERT(IS_SWAPFSVP(pp->p_vnode)); 10186695Saguzovsk ASSERT(ap->an_refcnt != 0); 10196695Saguzovsk ASSERT(pp->p_vnode == ap->an_vp); 10206695Saguzovsk ASSERT(pp->p_offset == ap->an_off); 10216695Saguzovsk 10226695Saguzovsk if (ap->an_pvp == NULL) 10236695Saguzovsk return; 10246695Saguzovsk 10256695Saguzovsk page_io_lock(pp); 102612173SMichael.Corcoran@Sun.COM ahm = AH_MUTEX(ap->an_vp, ap->an_off); 10276695Saguzovsk mutex_enter(ahm); 10286695Saguzovsk 10296695Saguzovsk ASSERT(ap->an_refcnt != 0); 10306695Saguzovsk ASSERT(pp->p_vnode == ap->an_vp); 10316695Saguzovsk ASSERT(pp->p_offset == ap->an_off); 10326695Saguzovsk 10336695Saguzovsk if (ap->an_pvp != NULL) { 10346695Saguzovsk swap_phys_free(ap->an_pvp, ap->an_poff, PAGESIZE); 10356695Saguzovsk ap->an_pvp = NULL; 10366695Saguzovsk ap->an_poff = 0; 10376695Saguzovsk mutex_exit(ahm); 10386695Saguzovsk hat_setmod(pp); 10396695Saguzovsk } else { 10406695Saguzovsk mutex_exit(ahm); 10416695Saguzovsk } 10426695Saguzovsk page_io_unlock(pp); 10436695Saguzovsk } 10446695Saguzovsk 10456695Saguzovsk /* 10460Sstevel@tonic-gate * Decrement the reference count of an anon page. 10470Sstevel@tonic-gate * If reference count goes to zero, free it and 10480Sstevel@tonic-gate * its associated page (if any). 10490Sstevel@tonic-gate */ 10500Sstevel@tonic-gate void 10510Sstevel@tonic-gate anon_decref(struct anon *ap) 10520Sstevel@tonic-gate { 10530Sstevel@tonic-gate page_t *pp; 10540Sstevel@tonic-gate struct vnode *vp; 10550Sstevel@tonic-gate anoff_t off; 10560Sstevel@tonic-gate kmutex_t *ahm; 10570Sstevel@tonic-gate 105812173SMichael.Corcoran@Sun.COM ahm = AH_MUTEX(ap->an_vp, ap->an_off); 10590Sstevel@tonic-gate mutex_enter(ahm); 10600Sstevel@tonic-gate ASSERT(ap->an_refcnt != 0); 10610Sstevel@tonic-gate if (ap->an_refcnt == 0) 10620Sstevel@tonic-gate panic("anon_decref: slot count 0"); 10630Sstevel@tonic-gate if (--ap->an_refcnt == 0) { 10640Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 10655665Sstans anon_rmhash(ap); 10665665Sstans if (ap->an_pvp != NULL) 10675665Sstans swap_phys_free(ap->an_pvp, ap->an_poff, PAGESIZE); 10680Sstevel@tonic-gate mutex_exit(ahm); 10690Sstevel@tonic-gate 10700Sstevel@tonic-gate /* 10710Sstevel@tonic-gate * If there is a page for this anon slot we will need to 10720Sstevel@tonic-gate * call VN_DISPOSE to get rid of the vp association and 10730Sstevel@tonic-gate * put the page back on the free list as really free. 10740Sstevel@tonic-gate * Acquire the "exclusive" lock to ensure that any 10750Sstevel@tonic-gate * pending i/o always completes before the swap slot 10760Sstevel@tonic-gate * is freed. 10770Sstevel@tonic-gate */ 10780Sstevel@tonic-gate pp = page_lookup(vp, (u_offset_t)off, SE_EXCL); 10790Sstevel@tonic-gate if (pp != NULL) { 10800Sstevel@tonic-gate /*LINTED: constant in conditional context */ 10810Sstevel@tonic-gate VN_DISPOSE(pp, B_INVAL, 0, kcred); 10820Sstevel@tonic-gate } 10830Sstevel@tonic-gate ANON_PRINT(A_ANON, ("anon_decref: free ap %p, vp %p\n", 10840Sstevel@tonic-gate (void *)ap, (void *)ap->an_vp)); 10855665Sstans 10860Sstevel@tonic-gate kmem_cache_free(anon_cache, ap); 10870Sstevel@tonic-gate 10880Sstevel@tonic-gate ANI_ADD(1); 10890Sstevel@tonic-gate } else { 10900Sstevel@tonic-gate mutex_exit(ahm); 10910Sstevel@tonic-gate } 10920Sstevel@tonic-gate } 10930Sstevel@tonic-gate 10945466Skchow 10955466Skchow /* 10965466Skchow * check an_refcnt of the root anon slot (anon_index argument is aligned at 10975466Skchow * seg->s_szc level) to determine whether COW processing is required. 10985466Skchow * anonpages_hash_lock[] held on the root ap ensures that if root's 10995466Skchow * refcnt is 1 all other refcnt's are 1 as well (and they can't increase 11005466Skchow * later since this process can't fork while its AS lock is held). 11015466Skchow * 11025466Skchow * returns 1 if the root anon slot has a refcnt > 1 otherwise returns 0. 11035466Skchow */ 11045466Skchow int 11055466Skchow anon_szcshare(struct anon_hdr *ahp, ulong_t anon_index) 11065466Skchow { 11075466Skchow struct anon *ap; 11085466Skchow kmutex_t *ahmpages = NULL; 11095466Skchow 11105466Skchow ap = anon_get_ptr(ahp, anon_index); 11115466Skchow if (ap == NULL) 11125466Skchow return (0); 11135466Skchow 111412173SMichael.Corcoran@Sun.COM ahmpages = APH_MUTEX(ap->an_vp, ap->an_off); 11155466Skchow mutex_enter(ahmpages); 11165466Skchow ASSERT(ap->an_refcnt >= 1); 11175466Skchow if (ap->an_refcnt == 1) { 11185466Skchow mutex_exit(ahmpages); 11195466Skchow return (0); 11205466Skchow } 11215466Skchow mutex_exit(ahmpages); 11225466Skchow return (1); 11235466Skchow } 11245466Skchow /* 11255466Skchow * Check 'nslots' anon slots for refcnt > 1. 11265466Skchow * 11275466Skchow * returns 1 if any of the 'nslots' anon slots has a refcnt > 1 otherwise 11285466Skchow * returns 0. 11295466Skchow */ 11300Sstevel@tonic-gate static int 11310Sstevel@tonic-gate anon_share(struct anon_hdr *ahp, ulong_t anon_index, pgcnt_t nslots) 11320Sstevel@tonic-gate { 11330Sstevel@tonic-gate struct anon *ap; 11340Sstevel@tonic-gate 11350Sstevel@tonic-gate while (nslots-- > 0) { 11360Sstevel@tonic-gate if ((ap = anon_get_ptr(ahp, anon_index)) != NULL && 11370Sstevel@tonic-gate ap->an_refcnt > 1) 11380Sstevel@tonic-gate return (1); 11390Sstevel@tonic-gate anon_index++; 11400Sstevel@tonic-gate } 11410Sstevel@tonic-gate 11420Sstevel@tonic-gate return (0); 11430Sstevel@tonic-gate } 11440Sstevel@tonic-gate 11450Sstevel@tonic-gate static void 11460Sstevel@tonic-gate anon_decref_pages( 11470Sstevel@tonic-gate struct anon_hdr *ahp, 11480Sstevel@tonic-gate ulong_t an_idx, 11490Sstevel@tonic-gate uint_t szc) 11500Sstevel@tonic-gate { 11510Sstevel@tonic-gate struct anon *ap = anon_get_ptr(ahp, an_idx); 11520Sstevel@tonic-gate kmutex_t *ahmpages = NULL; 11530Sstevel@tonic-gate page_t *pp; 11540Sstevel@tonic-gate pgcnt_t pgcnt = page_get_pagecnt(szc); 11550Sstevel@tonic-gate pgcnt_t i; 11560Sstevel@tonic-gate struct vnode *vp; 11570Sstevel@tonic-gate anoff_t off; 11580Sstevel@tonic-gate kmutex_t *ahm; 11590Sstevel@tonic-gate #ifdef DEBUG 11600Sstevel@tonic-gate int refcnt = 1; 11610Sstevel@tonic-gate #endif 11620Sstevel@tonic-gate 11630Sstevel@tonic-gate ASSERT(szc != 0); 11640Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pgcnt, pgcnt)); 11650Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(an_idx, pgcnt)); 11662414Saguzovsk ASSERT(an_idx < ahp->size); 11672414Saguzovsk 11682414Saguzovsk if (ahp->size - an_idx < pgcnt) { 11692414Saguzovsk /* 11702414Saguzovsk * In case of shared mappings total anon map size may not be 11712414Saguzovsk * the largest page size aligned. 11722414Saguzovsk */ 11732414Saguzovsk pgcnt = ahp->size - an_idx; 11742414Saguzovsk } 11750Sstevel@tonic-gate 11760Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.decrefpages[0]); 11770Sstevel@tonic-gate 11780Sstevel@tonic-gate if (ap != NULL) { 117912173SMichael.Corcoran@Sun.COM ahmpages = APH_MUTEX(ap->an_vp, ap->an_off); 11800Sstevel@tonic-gate mutex_enter(ahmpages); 11810Sstevel@tonic-gate ASSERT((refcnt = ap->an_refcnt) != 0); 11820Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.decrefpages[1]); 11830Sstevel@tonic-gate if (ap->an_refcnt == 1) { 11840Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.decrefpages[2]); 11850Sstevel@tonic-gate ASSERT(!anon_share(ahp, an_idx, pgcnt)); 11860Sstevel@tonic-gate mutex_exit(ahmpages); 11870Sstevel@tonic-gate ahmpages = NULL; 11880Sstevel@tonic-gate } 11890Sstevel@tonic-gate } 11900Sstevel@tonic-gate 11910Sstevel@tonic-gate i = 0; 11920Sstevel@tonic-gate while (i < pgcnt) { 11930Sstevel@tonic-gate if ((ap = anon_get_ptr(ahp, an_idx + i)) == NULL) { 11940Sstevel@tonic-gate ASSERT(refcnt == 1 && ahmpages == NULL); 11950Sstevel@tonic-gate i++; 11960Sstevel@tonic-gate continue; 11970Sstevel@tonic-gate } 11980Sstevel@tonic-gate ASSERT(ap->an_refcnt == refcnt); 11990Sstevel@tonic-gate ASSERT(ahmpages != NULL || ap->an_refcnt == 1); 12000Sstevel@tonic-gate ASSERT(ahmpages == NULL || ap->an_refcnt > 1); 12010Sstevel@tonic-gate 12020Sstevel@tonic-gate if (ahmpages == NULL) { 12030Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 12040Sstevel@tonic-gate pp = page_lookup(vp, (u_offset_t)off, SE_EXCL); 12050Sstevel@tonic-gate if (pp == NULL || pp->p_szc == 0) { 12060Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.decrefpages[3]); 120712173SMichael.Corcoran@Sun.COM ahm = AH_MUTEX(ap->an_vp, ap->an_off); 12080Sstevel@tonic-gate (void) anon_set_ptr(ahp, an_idx + i, NULL, 12090Sstevel@tonic-gate ANON_SLEEP); 12100Sstevel@tonic-gate mutex_enter(ahm); 12110Sstevel@tonic-gate ap->an_refcnt--; 12120Sstevel@tonic-gate ASSERT(ap->an_refcnt == 0); 12130Sstevel@tonic-gate anon_rmhash(ap); 12140Sstevel@tonic-gate if (ap->an_pvp) 12150Sstevel@tonic-gate swap_phys_free(ap->an_pvp, ap->an_poff, 12160Sstevel@tonic-gate PAGESIZE); 12170Sstevel@tonic-gate mutex_exit(ahm); 12185665Sstans if (pp == NULL) { 12195665Sstans pp = page_lookup(vp, (u_offset_t)off, 12205665Sstans SE_EXCL); 12215665Sstans ASSERT(pp == NULL || pp->p_szc == 0); 12225665Sstans } 12230Sstevel@tonic-gate if (pp != NULL) { 12240Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.decrefpages[4]); 12250Sstevel@tonic-gate /*LINTED*/ 12260Sstevel@tonic-gate VN_DISPOSE(pp, B_INVAL, 0, kcred); 12270Sstevel@tonic-gate } 12280Sstevel@tonic-gate kmem_cache_free(anon_cache, ap); 12290Sstevel@tonic-gate ANI_ADD(1); 12300Sstevel@tonic-gate i++; 12310Sstevel@tonic-gate } else { 12320Sstevel@tonic-gate pgcnt_t j; 12330Sstevel@tonic-gate pgcnt_t curpgcnt = 12340Sstevel@tonic-gate page_get_pagecnt(pp->p_szc); 12350Sstevel@tonic-gate size_t ppasize = curpgcnt * sizeof (page_t *); 12360Sstevel@tonic-gate page_t **ppa = kmem_alloc(ppasize, KM_SLEEP); 12370Sstevel@tonic-gate int dispose = 0; 12380Sstevel@tonic-gate 12390Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.decrefpages[5]); 12400Sstevel@tonic-gate 12410Sstevel@tonic-gate ASSERT(pp->p_szc <= szc); 12420Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(curpgcnt, curpgcnt)); 12430Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(i, curpgcnt)); 12440Sstevel@tonic-gate ASSERT(i + curpgcnt <= pgcnt); 12450Sstevel@tonic-gate ASSERT(!(page_pptonum(pp) & (curpgcnt - 1))); 12460Sstevel@tonic-gate ppa[0] = pp; 12470Sstevel@tonic-gate for (j = i + 1; j < i + curpgcnt; j++) { 12480Sstevel@tonic-gate ap = anon_get_ptr(ahp, an_idx + j); 12490Sstevel@tonic-gate ASSERT(ap != NULL && 12500Sstevel@tonic-gate ap->an_refcnt == 1); 12510Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 12520Sstevel@tonic-gate pp = page_lookup(vp, (u_offset_t)off, 12530Sstevel@tonic-gate SE_EXCL); 12540Sstevel@tonic-gate if (pp == NULL) 12550Sstevel@tonic-gate panic("anon_decref_pages: " 12560Sstevel@tonic-gate "no page"); 12570Sstevel@tonic-gate 12580Sstevel@tonic-gate (void) hat_pageunload(pp, 12590Sstevel@tonic-gate HAT_FORCE_PGUNLOAD); 12600Sstevel@tonic-gate ASSERT(pp->p_szc == ppa[0]->p_szc); 12610Sstevel@tonic-gate ASSERT(page_pptonum(pp) - 1 == 12620Sstevel@tonic-gate page_pptonum(ppa[j - i - 1])); 12630Sstevel@tonic-gate ppa[j - i] = pp; 12640Sstevel@tonic-gate if (ap->an_pvp != NULL && 12650Sstevel@tonic-gate !vn_matchopval(ap->an_pvp, 12665466Skchow VOPNAME_DISPOSE, 12675466Skchow (fs_generic_func_p)fs_dispose)) 12680Sstevel@tonic-gate dispose = 1; 12690Sstevel@tonic-gate } 12700Sstevel@tonic-gate for (j = i; j < i + curpgcnt; j++) { 12710Sstevel@tonic-gate ap = anon_get_ptr(ahp, an_idx + j); 12720Sstevel@tonic-gate ASSERT(ap != NULL && 12730Sstevel@tonic-gate ap->an_refcnt == 1); 127412173SMichael.Corcoran@Sun.COM ahm = AH_MUTEX(ap->an_vp, ap->an_off); 12750Sstevel@tonic-gate (void) anon_set_ptr(ahp, an_idx + j, 12760Sstevel@tonic-gate NULL, ANON_SLEEP); 12770Sstevel@tonic-gate mutex_enter(ahm); 12780Sstevel@tonic-gate ap->an_refcnt--; 12790Sstevel@tonic-gate ASSERT(ap->an_refcnt == 0); 12800Sstevel@tonic-gate anon_rmhash(ap); 12810Sstevel@tonic-gate if (ap->an_pvp) 12820Sstevel@tonic-gate swap_phys_free(ap->an_pvp, 12835466Skchow ap->an_poff, PAGESIZE); 12840Sstevel@tonic-gate mutex_exit(ahm); 12850Sstevel@tonic-gate kmem_cache_free(anon_cache, ap); 12860Sstevel@tonic-gate ANI_ADD(1); 12870Sstevel@tonic-gate } 12885665Sstans if (!dispose) { 12895665Sstans VM_STAT_ADD(anonvmstats.decrefpages[6]); 12905665Sstans page_destroy_pages(ppa[0]); 12915665Sstans } else { 12925665Sstans VM_STAT_ADD(anonvmstats.decrefpages[7]); 12935665Sstans for (j = 0; j < curpgcnt; j++) { 12945665Sstans ASSERT(PAGE_EXCL(ppa[j])); 12955665Sstans ppa[j]->p_szc = 0; 12965665Sstans } 12975665Sstans for (j = 0; j < curpgcnt; j++) { 12985665Sstans ASSERT(!hat_page_is_mapped( 12995665Sstans ppa[j])); 13005665Sstans /*LINTED*/ 13015665Sstans VN_DISPOSE(ppa[j], B_INVAL, 0, 13025665Sstans kcred); 13035665Sstans } 13045665Sstans } 13055665Sstans kmem_free(ppa, ppasize); 13060Sstevel@tonic-gate i += curpgcnt; 13070Sstevel@tonic-gate } 13080Sstevel@tonic-gate } else { 13090Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.decrefpages[8]); 13100Sstevel@tonic-gate (void) anon_set_ptr(ahp, an_idx + i, NULL, ANON_SLEEP); 131112173SMichael.Corcoran@Sun.COM ahm = AH_MUTEX(ap->an_vp, ap->an_off); 13120Sstevel@tonic-gate mutex_enter(ahm); 13130Sstevel@tonic-gate ap->an_refcnt--; 13140Sstevel@tonic-gate mutex_exit(ahm); 13150Sstevel@tonic-gate i++; 13160Sstevel@tonic-gate } 13170Sstevel@tonic-gate } 13180Sstevel@tonic-gate 13190Sstevel@tonic-gate if (ahmpages != NULL) { 13200Sstevel@tonic-gate mutex_exit(ahmpages); 13210Sstevel@tonic-gate } 13220Sstevel@tonic-gate } 13230Sstevel@tonic-gate 13240Sstevel@tonic-gate /* 13250Sstevel@tonic-gate * Duplicate references to size bytes worth of anon pages. 13260Sstevel@tonic-gate * Used when duplicating a segment that contains private anon pages. 13270Sstevel@tonic-gate * This code assumes that procedure calling this one has already used 13280Sstevel@tonic-gate * hat_chgprot() to disable write access to the range of addresses that 13290Sstevel@tonic-gate * that *old actually refers to. 13300Sstevel@tonic-gate */ 13310Sstevel@tonic-gate void 13320Sstevel@tonic-gate anon_dup(struct anon_hdr *old, ulong_t old_idx, struct anon_hdr *new, 13330Sstevel@tonic-gate ulong_t new_idx, size_t size) 13340Sstevel@tonic-gate { 13350Sstevel@tonic-gate spgcnt_t npages; 13360Sstevel@tonic-gate kmutex_t *ahm; 13370Sstevel@tonic-gate struct anon *ap; 13380Sstevel@tonic-gate ulong_t off; 13390Sstevel@tonic-gate ulong_t index; 13400Sstevel@tonic-gate 13410Sstevel@tonic-gate npages = btopr(size); 13420Sstevel@tonic-gate while (npages > 0) { 13430Sstevel@tonic-gate index = old_idx; 13440Sstevel@tonic-gate if ((ap = anon_get_next_ptr(old, &index)) == NULL) 13450Sstevel@tonic-gate break; 13460Sstevel@tonic-gate 13470Sstevel@tonic-gate ASSERT(!ANON_ISBUSY(anon_get_slot(old, index))); 13480Sstevel@tonic-gate off = index - old_idx; 13490Sstevel@tonic-gate npages -= off; 13500Sstevel@tonic-gate if (npages <= 0) 13510Sstevel@tonic-gate break; 13520Sstevel@tonic-gate 13530Sstevel@tonic-gate (void) anon_set_ptr(new, new_idx + off, ap, ANON_SLEEP); 135412173SMichael.Corcoran@Sun.COM ahm = AH_MUTEX(ap->an_vp, ap->an_off); 13550Sstevel@tonic-gate 13560Sstevel@tonic-gate mutex_enter(ahm); 13570Sstevel@tonic-gate ap->an_refcnt++; 13580Sstevel@tonic-gate mutex_exit(ahm); 13590Sstevel@tonic-gate 13600Sstevel@tonic-gate off++; 13610Sstevel@tonic-gate new_idx += off; 13620Sstevel@tonic-gate old_idx += off; 13630Sstevel@tonic-gate npages--; 13640Sstevel@tonic-gate } 13650Sstevel@tonic-gate } 13660Sstevel@tonic-gate 13670Sstevel@tonic-gate /* 13680Sstevel@tonic-gate * Just like anon_dup but also guarantees there are no holes (unallocated anon 13690Sstevel@tonic-gate * slots) within any large page region. That means if a large page region is 13700Sstevel@tonic-gate * empty in the old array it will skip it. If there are 1 or more valid slots 13710Sstevel@tonic-gate * in the large page region of the old array it will make sure to fill in any 13720Sstevel@tonic-gate * unallocated ones and also copy them to the new array. If noalloc is 1 large 13730Sstevel@tonic-gate * page region should either have no valid anon slots or all slots should be 13740Sstevel@tonic-gate * valid. 13750Sstevel@tonic-gate */ 13760Sstevel@tonic-gate void 13770Sstevel@tonic-gate anon_dup_fill_holes( 13780Sstevel@tonic-gate struct anon_hdr *old, 13790Sstevel@tonic-gate ulong_t old_idx, 13800Sstevel@tonic-gate struct anon_hdr *new, 13810Sstevel@tonic-gate ulong_t new_idx, 13820Sstevel@tonic-gate size_t size, 13830Sstevel@tonic-gate uint_t szc, 13840Sstevel@tonic-gate int noalloc) 13850Sstevel@tonic-gate { 13860Sstevel@tonic-gate struct anon *ap; 13870Sstevel@tonic-gate spgcnt_t npages; 13880Sstevel@tonic-gate kmutex_t *ahm, *ahmpages = NULL; 13890Sstevel@tonic-gate pgcnt_t pgcnt, i; 13900Sstevel@tonic-gate ulong_t index, off; 13910Sstevel@tonic-gate #ifdef DEBUG 13920Sstevel@tonic-gate int refcnt; 13930Sstevel@tonic-gate #endif 13940Sstevel@tonic-gate 13950Sstevel@tonic-gate ASSERT(szc != 0); 13960Sstevel@tonic-gate pgcnt = page_get_pagecnt(szc); 13970Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pgcnt, pgcnt)); 13980Sstevel@tonic-gate npages = btopr(size); 13990Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(npages, pgcnt)); 14000Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(old_idx, pgcnt)); 14010Sstevel@tonic-gate 14020Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.dupfillholes[0]); 14030Sstevel@tonic-gate 14040Sstevel@tonic-gate while (npages > 0) { 14050Sstevel@tonic-gate index = old_idx; 14060Sstevel@tonic-gate 14070Sstevel@tonic-gate /* 14080Sstevel@tonic-gate * Find the next valid slot. 14090Sstevel@tonic-gate */ 14100Sstevel@tonic-gate if (anon_get_next_ptr(old, &index) == NULL) 14110Sstevel@tonic-gate break; 14120Sstevel@tonic-gate 14130Sstevel@tonic-gate ASSERT(!ANON_ISBUSY(anon_get_slot(old, index))); 14140Sstevel@tonic-gate /* 14150Sstevel@tonic-gate * Now backup index to the beginning of the 14160Sstevel@tonic-gate * current large page region of the old array. 14170Sstevel@tonic-gate */ 14180Sstevel@tonic-gate index = P2ALIGN(index, pgcnt); 14190Sstevel@tonic-gate off = index - old_idx; 14200Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(off, pgcnt)); 14210Sstevel@tonic-gate npages -= off; 14220Sstevel@tonic-gate if (npages <= 0) 14230Sstevel@tonic-gate break; 14240Sstevel@tonic-gate 14250Sstevel@tonic-gate /* 14260Sstevel@tonic-gate * Fill and copy a large page regions worth 14270Sstevel@tonic-gate * of anon slots. 14280Sstevel@tonic-gate */ 14290Sstevel@tonic-gate for (i = 0; i < pgcnt; i++) { 14300Sstevel@tonic-gate if ((ap = anon_get_ptr(old, index + i)) == NULL) { 14310Sstevel@tonic-gate if (noalloc) { 14320Sstevel@tonic-gate panic("anon_dup_fill_holes: " 14330Sstevel@tonic-gate "empty anon slot\n"); 14340Sstevel@tonic-gate } 14350Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.dupfillholes[1]); 14360Sstevel@tonic-gate ap = anon_alloc(NULL, 0); 14370Sstevel@tonic-gate (void) anon_set_ptr(old, index + i, ap, 14380Sstevel@tonic-gate ANON_SLEEP); 14390Sstevel@tonic-gate } else if (i == 0) { 14400Sstevel@tonic-gate /* 14410Sstevel@tonic-gate * make the increment of all refcnts of all 14420Sstevel@tonic-gate * anon slots of a large page appear atomic by 14430Sstevel@tonic-gate * getting an anonpages_hash_lock for the 14440Sstevel@tonic-gate * first anon slot of a large page. 14450Sstevel@tonic-gate */ 14460Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.dupfillholes[2]); 14470Sstevel@tonic-gate 144812173SMichael.Corcoran@Sun.COM ahmpages = APH_MUTEX(ap->an_vp, ap->an_off); 14490Sstevel@tonic-gate mutex_enter(ahmpages); 14500Sstevel@tonic-gate /*LINTED*/ 14510Sstevel@tonic-gate ASSERT(refcnt = ap->an_refcnt); 14520Sstevel@tonic-gate 14530Sstevel@tonic-gate VM_STAT_COND_ADD(ap->an_refcnt > 1, 14540Sstevel@tonic-gate anonvmstats.dupfillholes[3]); 14550Sstevel@tonic-gate } 14560Sstevel@tonic-gate (void) anon_set_ptr(new, new_idx + off + i, ap, 14570Sstevel@tonic-gate ANON_SLEEP); 145812173SMichael.Corcoran@Sun.COM ahm = AH_MUTEX(ap->an_vp, ap->an_off); 14590Sstevel@tonic-gate mutex_enter(ahm); 14600Sstevel@tonic-gate ASSERT(ahmpages != NULL || ap->an_refcnt == 1); 14610Sstevel@tonic-gate ASSERT(i == 0 || ahmpages == NULL || 14620Sstevel@tonic-gate refcnt == ap->an_refcnt); 14630Sstevel@tonic-gate ap->an_refcnt++; 14640Sstevel@tonic-gate mutex_exit(ahm); 14650Sstevel@tonic-gate } 14660Sstevel@tonic-gate if (ahmpages != NULL) { 14670Sstevel@tonic-gate mutex_exit(ahmpages); 14680Sstevel@tonic-gate ahmpages = NULL; 14690Sstevel@tonic-gate } 14700Sstevel@tonic-gate off += pgcnt; 14710Sstevel@tonic-gate new_idx += off; 14720Sstevel@tonic-gate old_idx += off; 14730Sstevel@tonic-gate npages -= pgcnt; 14740Sstevel@tonic-gate } 14750Sstevel@tonic-gate } 14760Sstevel@tonic-gate 14770Sstevel@tonic-gate /* 14780Sstevel@tonic-gate * Used when a segment with a vnode changes szc. similarly to 14790Sstevel@tonic-gate * anon_dup_fill_holes() makes sure each large page region either has no anon 14800Sstevel@tonic-gate * slots or all of them. but new slots are created by COWing the file 14810Sstevel@tonic-gate * pages. on entrance no anon slots should be shared. 14820Sstevel@tonic-gate */ 14830Sstevel@tonic-gate int 14840Sstevel@tonic-gate anon_fill_cow_holes( 14850Sstevel@tonic-gate struct seg *seg, 14860Sstevel@tonic-gate caddr_t addr, 14870Sstevel@tonic-gate struct anon_hdr *ahp, 14880Sstevel@tonic-gate ulong_t an_idx, 14890Sstevel@tonic-gate struct vnode *vp, 14900Sstevel@tonic-gate u_offset_t vp_off, 14910Sstevel@tonic-gate size_t size, 14920Sstevel@tonic-gate uint_t szc, 14930Sstevel@tonic-gate uint_t prot, 14940Sstevel@tonic-gate struct vpage vpage[], 14950Sstevel@tonic-gate struct cred *cred) 14960Sstevel@tonic-gate { 14970Sstevel@tonic-gate struct anon *ap; 14980Sstevel@tonic-gate spgcnt_t npages; 14990Sstevel@tonic-gate pgcnt_t pgcnt, i; 15000Sstevel@tonic-gate ulong_t index, off; 15010Sstevel@tonic-gate int err = 0; 15020Sstevel@tonic-gate int pageflags = 0; 15030Sstevel@tonic-gate 15040Sstevel@tonic-gate ASSERT(szc != 0); 15050Sstevel@tonic-gate pgcnt = page_get_pagecnt(szc); 15060Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pgcnt, pgcnt)); 15070Sstevel@tonic-gate npages = btopr(size); 15080Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(npages, pgcnt)); 15090Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(an_idx, pgcnt)); 15100Sstevel@tonic-gate 15110Sstevel@tonic-gate while (npages > 0) { 15120Sstevel@tonic-gate index = an_idx; 15130Sstevel@tonic-gate 15140Sstevel@tonic-gate /* 15150Sstevel@tonic-gate * Find the next valid slot. 15160Sstevel@tonic-gate */ 15170Sstevel@tonic-gate if (anon_get_next_ptr(ahp, &index) == NULL) { 15180Sstevel@tonic-gate break; 15190Sstevel@tonic-gate } 15200Sstevel@tonic-gate 15210Sstevel@tonic-gate ASSERT(!ANON_ISBUSY(anon_get_slot(ahp, index))); 15220Sstevel@tonic-gate /* 15230Sstevel@tonic-gate * Now backup index to the beginning of the 15240Sstevel@tonic-gate * current large page region of the anon array. 15250Sstevel@tonic-gate */ 15260Sstevel@tonic-gate index = P2ALIGN(index, pgcnt); 15270Sstevel@tonic-gate off = index - an_idx; 15280Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(off, pgcnt)); 15290Sstevel@tonic-gate npages -= off; 15300Sstevel@tonic-gate if (npages <= 0) 15310Sstevel@tonic-gate break; 15320Sstevel@tonic-gate an_idx += off; 15330Sstevel@tonic-gate vp_off += ptob(off); 15340Sstevel@tonic-gate addr += ptob(off); 15350Sstevel@tonic-gate if (vpage != NULL) { 15360Sstevel@tonic-gate vpage += off; 15370Sstevel@tonic-gate } 15380Sstevel@tonic-gate 15390Sstevel@tonic-gate for (i = 0; i < pgcnt; i++, an_idx++, vp_off += PAGESIZE) { 15400Sstevel@tonic-gate if ((ap = anon_get_ptr(ahp, an_idx)) == NULL) { 15410Sstevel@tonic-gate page_t *pl[1 + 1]; 15420Sstevel@tonic-gate page_t *pp; 15430Sstevel@tonic-gate 15440Sstevel@tonic-gate err = VOP_GETPAGE(vp, vp_off, PAGESIZE, NULL, 15455331Samw pl, PAGESIZE, seg, addr, S_READ, cred, 15465331Samw NULL); 15470Sstevel@tonic-gate if (err) { 15480Sstevel@tonic-gate break; 15490Sstevel@tonic-gate } 15500Sstevel@tonic-gate if (vpage != NULL) { 15510Sstevel@tonic-gate prot = VPP_PROT(vpage); 15520Sstevel@tonic-gate pageflags = VPP_ISPPLOCK(vpage) ? 15530Sstevel@tonic-gate LOCK_PAGE : 0; 15540Sstevel@tonic-gate } 15550Sstevel@tonic-gate pp = anon_private(&ap, seg, addr, prot, pl[0], 15565466Skchow pageflags, cred); 15570Sstevel@tonic-gate if (pp == NULL) { 15580Sstevel@tonic-gate err = ENOMEM; 15590Sstevel@tonic-gate break; 15600Sstevel@tonic-gate } 15610Sstevel@tonic-gate (void) anon_set_ptr(ahp, an_idx, ap, 15620Sstevel@tonic-gate ANON_SLEEP); 15630Sstevel@tonic-gate page_unlock(pp); 15640Sstevel@tonic-gate } 15650Sstevel@tonic-gate ASSERT(ap->an_refcnt == 1); 15660Sstevel@tonic-gate addr += PAGESIZE; 15670Sstevel@tonic-gate if (vpage != NULL) { 15680Sstevel@tonic-gate vpage++; 15690Sstevel@tonic-gate } 15700Sstevel@tonic-gate } 15710Sstevel@tonic-gate npages -= pgcnt; 15720Sstevel@tonic-gate } 15730Sstevel@tonic-gate 15740Sstevel@tonic-gate return (err); 15750Sstevel@tonic-gate } 15760Sstevel@tonic-gate 15770Sstevel@tonic-gate /* 15780Sstevel@tonic-gate * Free a group of "size" anon pages, size in bytes, 15790Sstevel@tonic-gate * and clear out the pointers to the anon entries. 15800Sstevel@tonic-gate */ 15810Sstevel@tonic-gate void 15820Sstevel@tonic-gate anon_free(struct anon_hdr *ahp, ulong_t index, size_t size) 15830Sstevel@tonic-gate { 15840Sstevel@tonic-gate spgcnt_t npages; 15850Sstevel@tonic-gate struct anon *ap; 15860Sstevel@tonic-gate ulong_t old; 15870Sstevel@tonic-gate 15880Sstevel@tonic-gate npages = btopr(size); 15890Sstevel@tonic-gate 15900Sstevel@tonic-gate while (npages > 0) { 15910Sstevel@tonic-gate old = index; 15920Sstevel@tonic-gate if ((ap = anon_get_next_ptr(ahp, &index)) == NULL) 15930Sstevel@tonic-gate break; 15940Sstevel@tonic-gate 15950Sstevel@tonic-gate ASSERT(!ANON_ISBUSY(anon_get_slot(ahp, index))); 15960Sstevel@tonic-gate npages -= index - old; 15970Sstevel@tonic-gate if (npages <= 0) 15980Sstevel@tonic-gate break; 15990Sstevel@tonic-gate 16000Sstevel@tonic-gate (void) anon_set_ptr(ahp, index, NULL, ANON_SLEEP); 16010Sstevel@tonic-gate anon_decref(ap); 16020Sstevel@tonic-gate /* 16030Sstevel@tonic-gate * Bump index and decrement page count 16040Sstevel@tonic-gate */ 16050Sstevel@tonic-gate index++; 16060Sstevel@tonic-gate npages--; 16070Sstevel@tonic-gate } 16080Sstevel@tonic-gate } 16090Sstevel@tonic-gate 16100Sstevel@tonic-gate void 16110Sstevel@tonic-gate anon_free_pages( 16120Sstevel@tonic-gate struct anon_hdr *ahp, 16130Sstevel@tonic-gate ulong_t an_idx, 16140Sstevel@tonic-gate size_t size, 16150Sstevel@tonic-gate uint_t szc) 16160Sstevel@tonic-gate { 16170Sstevel@tonic-gate spgcnt_t npages; 16180Sstevel@tonic-gate pgcnt_t pgcnt; 16190Sstevel@tonic-gate ulong_t index, off; 16200Sstevel@tonic-gate 16210Sstevel@tonic-gate ASSERT(szc != 0); 16220Sstevel@tonic-gate pgcnt = page_get_pagecnt(szc); 16230Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pgcnt, pgcnt)); 16240Sstevel@tonic-gate npages = btopr(size); 16250Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(npages, pgcnt)); 16260Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(an_idx, pgcnt)); 16272414Saguzovsk ASSERT(an_idx < ahp->size); 16280Sstevel@tonic-gate 16290Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.freepages[0]); 16300Sstevel@tonic-gate 16310Sstevel@tonic-gate while (npages > 0) { 16320Sstevel@tonic-gate index = an_idx; 16330Sstevel@tonic-gate 16340Sstevel@tonic-gate /* 16350Sstevel@tonic-gate * Find the next valid slot. 16360Sstevel@tonic-gate */ 16370Sstevel@tonic-gate if (anon_get_next_ptr(ahp, &index) == NULL) 16380Sstevel@tonic-gate break; 16390Sstevel@tonic-gate 16400Sstevel@tonic-gate ASSERT(!ANON_ISBUSY(anon_get_slot(ahp, index))); 16410Sstevel@tonic-gate /* 16420Sstevel@tonic-gate * Now backup index to the beginning of the 16430Sstevel@tonic-gate * current large page region of the old array. 16440Sstevel@tonic-gate */ 16450Sstevel@tonic-gate index = P2ALIGN(index, pgcnt); 16460Sstevel@tonic-gate off = index - an_idx; 16470Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(off, pgcnt)); 16480Sstevel@tonic-gate npages -= off; 16490Sstevel@tonic-gate if (npages <= 0) 16500Sstevel@tonic-gate break; 16510Sstevel@tonic-gate 16520Sstevel@tonic-gate anon_decref_pages(ahp, index, szc); 16530Sstevel@tonic-gate 16540Sstevel@tonic-gate off += pgcnt; 16550Sstevel@tonic-gate an_idx += off; 16560Sstevel@tonic-gate npages -= pgcnt; 16570Sstevel@tonic-gate } 16580Sstevel@tonic-gate } 16590Sstevel@tonic-gate 16600Sstevel@tonic-gate /* 16610Sstevel@tonic-gate * Make anonymous pages discardable 16620Sstevel@tonic-gate */ 16630Sstevel@tonic-gate void 16645224Smec anon_disclaim(struct anon_map *amp, ulong_t index, size_t size) 16650Sstevel@tonic-gate { 16660Sstevel@tonic-gate spgcnt_t npages = btopr(size); 16670Sstevel@tonic-gate struct anon *ap; 16680Sstevel@tonic-gate struct vnode *vp; 16690Sstevel@tonic-gate anoff_t off; 16700Sstevel@tonic-gate page_t *pp, *root_pp; 16710Sstevel@tonic-gate kmutex_t *ahm; 16720Sstevel@tonic-gate pgcnt_t pgcnt; 16730Sstevel@tonic-gate ulong_t old_idx, idx, i; 16740Sstevel@tonic-gate struct anon_hdr *ahp = amp->ahp; 16750Sstevel@tonic-gate anon_sync_obj_t cookie; 16760Sstevel@tonic-gate 16770Sstevel@tonic-gate ASSERT(RW_READ_HELD(&->a_rwlock)); 16780Sstevel@tonic-gate pgcnt = 1; 16795224Smec for (; npages > 0; index = (pgcnt == 1) ? index + 1 : 16805224Smec P2ROUNDUP(index + 1, pgcnt), npages -= pgcnt) { 16810Sstevel@tonic-gate 16820Sstevel@tonic-gate /* 16830Sstevel@tonic-gate * get anon pointer and index for the first valid entry 16840Sstevel@tonic-gate * in the anon list, starting from "index" 16850Sstevel@tonic-gate */ 16860Sstevel@tonic-gate old_idx = index; 16870Sstevel@tonic-gate if ((ap = anon_get_next_ptr(ahp, &index)) == NULL) 16880Sstevel@tonic-gate break; 16890Sstevel@tonic-gate 16900Sstevel@tonic-gate /* 16910Sstevel@tonic-gate * decrement npages by number of NULL anon slots we skipped 16920Sstevel@tonic-gate */ 16930Sstevel@tonic-gate npages -= index - old_idx; 16940Sstevel@tonic-gate if (npages <= 0) 16950Sstevel@tonic-gate break; 16960Sstevel@tonic-gate 16970Sstevel@tonic-gate anon_array_enter(amp, index, &cookie); 16980Sstevel@tonic-gate ap = anon_get_ptr(ahp, index); 16990Sstevel@tonic-gate ASSERT(ap != NULL); 17000Sstevel@tonic-gate 17010Sstevel@tonic-gate /* 17020Sstevel@tonic-gate * Get anonymous page and try to lock it SE_EXCL; 17035224Smec * if we couldn't grab the lock we skip to next page. 17040Sstevel@tonic-gate */ 17050Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 17065224Smec pp = page_lookup_nowait(vp, (u_offset_t)off, SE_EXCL); 17070Sstevel@tonic-gate if (pp == NULL) { 17080Sstevel@tonic-gate segadvstat.MADV_FREE_miss.value.ul++; 17090Sstevel@tonic-gate pgcnt = 1; 17100Sstevel@tonic-gate anon_array_exit(&cookie); 17110Sstevel@tonic-gate continue; 17120Sstevel@tonic-gate } 17130Sstevel@tonic-gate pgcnt = page_get_pagecnt(pp->p_szc); 17140Sstevel@tonic-gate 17150Sstevel@tonic-gate /* 17160Sstevel@tonic-gate * we cannot free a page which is permanently locked. 17170Sstevel@tonic-gate * The page_struct_lock need not be acquired to examine 17180Sstevel@tonic-gate * these fields since the page has an "exclusive" lock. 17190Sstevel@tonic-gate */ 17200Sstevel@tonic-gate if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) { 17210Sstevel@tonic-gate page_unlock(pp); 17220Sstevel@tonic-gate segadvstat.MADV_FREE_miss.value.ul++; 17230Sstevel@tonic-gate anon_array_exit(&cookie); 17240Sstevel@tonic-gate continue; 17250Sstevel@tonic-gate } 17260Sstevel@tonic-gate 172712173SMichael.Corcoran@Sun.COM ahm = AH_MUTEX(vp, off); 17280Sstevel@tonic-gate mutex_enter(ahm); 17290Sstevel@tonic-gate ASSERT(ap->an_refcnt != 0); 17300Sstevel@tonic-gate /* 17310Sstevel@tonic-gate * skip this one if copy-on-write is not yet broken. 17320Sstevel@tonic-gate */ 17330Sstevel@tonic-gate if (ap->an_refcnt > 1) { 17340Sstevel@tonic-gate mutex_exit(ahm); 17350Sstevel@tonic-gate page_unlock(pp); 17360Sstevel@tonic-gate segadvstat.MADV_FREE_miss.value.ul++; 17370Sstevel@tonic-gate anon_array_exit(&cookie); 17380Sstevel@tonic-gate continue; 17390Sstevel@tonic-gate } 17400Sstevel@tonic-gate 17410Sstevel@tonic-gate if (pp->p_szc == 0) { 17420Sstevel@tonic-gate pgcnt = 1; 17430Sstevel@tonic-gate 17440Sstevel@tonic-gate /* 17450Sstevel@tonic-gate * free swap slot; 17460Sstevel@tonic-gate */ 17470Sstevel@tonic-gate if (ap->an_pvp) { 17480Sstevel@tonic-gate swap_phys_free(ap->an_pvp, ap->an_poff, 17490Sstevel@tonic-gate PAGESIZE); 17500Sstevel@tonic-gate ap->an_pvp = NULL; 17510Sstevel@tonic-gate ap->an_poff = 0; 17520Sstevel@tonic-gate } 17530Sstevel@tonic-gate mutex_exit(ahm); 17540Sstevel@tonic-gate segadvstat.MADV_FREE_hit.value.ul++; 17550Sstevel@tonic-gate 17560Sstevel@tonic-gate /* 17570Sstevel@tonic-gate * while we are at it, unload all the translations 17580Sstevel@tonic-gate * and attempt to free the page. 17590Sstevel@tonic-gate */ 17600Sstevel@tonic-gate (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD); 17610Sstevel@tonic-gate /*LINTED: constant in conditional context */ 17620Sstevel@tonic-gate VN_DISPOSE(pp, B_FREE, 0, kcred); 17630Sstevel@tonic-gate anon_array_exit(&cookie); 17640Sstevel@tonic-gate continue; 17650Sstevel@tonic-gate } 17660Sstevel@tonic-gate 17670Sstevel@tonic-gate pgcnt = page_get_pagecnt(pp->p_szc); 17682414Saguzovsk if (!IS_P2ALIGNED(index, pgcnt) || npages < pgcnt) { 17690Sstevel@tonic-gate if (!page_try_demote_pages(pp)) { 17700Sstevel@tonic-gate mutex_exit(ahm); 17710Sstevel@tonic-gate page_unlock(pp); 17720Sstevel@tonic-gate segadvstat.MADV_FREE_miss.value.ul++; 17730Sstevel@tonic-gate anon_array_exit(&cookie); 17740Sstevel@tonic-gate continue; 17750Sstevel@tonic-gate } else { 17760Sstevel@tonic-gate pgcnt = 1; 17770Sstevel@tonic-gate if (ap->an_pvp) { 17780Sstevel@tonic-gate swap_phys_free(ap->an_pvp, 17790Sstevel@tonic-gate ap->an_poff, PAGESIZE); 17805224Smec ap->an_pvp = NULL; 17815224Smec ap->an_poff = 0; 17820Sstevel@tonic-gate } 17830Sstevel@tonic-gate mutex_exit(ahm); 17840Sstevel@tonic-gate (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD); 17850Sstevel@tonic-gate /*LINTED*/ 17860Sstevel@tonic-gate VN_DISPOSE(pp, B_FREE, 0, kcred); 17870Sstevel@tonic-gate segadvstat.MADV_FREE_hit.value.ul++; 17880Sstevel@tonic-gate anon_array_exit(&cookie); 17890Sstevel@tonic-gate continue; 17900Sstevel@tonic-gate } 17910Sstevel@tonic-gate } 17920Sstevel@tonic-gate mutex_exit(ahm); 17930Sstevel@tonic-gate root_pp = pp; 17940Sstevel@tonic-gate 17950Sstevel@tonic-gate /* 17960Sstevel@tonic-gate * try to lock remaining pages 17970Sstevel@tonic-gate */ 17980Sstevel@tonic-gate for (idx = 1; idx < pgcnt; idx++) { 1799414Skchow pp++; 18000Sstevel@tonic-gate if (!page_trylock(pp, SE_EXCL)) 18010Sstevel@tonic-gate break; 18020Sstevel@tonic-gate if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) { 18030Sstevel@tonic-gate page_unlock(pp); 18040Sstevel@tonic-gate break; 18050Sstevel@tonic-gate } 18060Sstevel@tonic-gate } 18070Sstevel@tonic-gate 18080Sstevel@tonic-gate if (idx == pgcnt) { 18090Sstevel@tonic-gate for (i = 0; i < pgcnt; i++) { 18100Sstevel@tonic-gate ap = anon_get_ptr(ahp, index + i); 18110Sstevel@tonic-gate if (ap == NULL) 18120Sstevel@tonic-gate break; 18130Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 181412173SMichael.Corcoran@Sun.COM ahm = AH_MUTEX(vp, off); 18150Sstevel@tonic-gate mutex_enter(ahm); 18160Sstevel@tonic-gate ASSERT(ap->an_refcnt != 0); 18170Sstevel@tonic-gate 18180Sstevel@tonic-gate /* 18190Sstevel@tonic-gate * skip this one if copy-on-write 18200Sstevel@tonic-gate * is not yet broken. 18210Sstevel@tonic-gate */ 18220Sstevel@tonic-gate if (ap->an_refcnt > 1) { 18230Sstevel@tonic-gate mutex_exit(ahm); 18240Sstevel@tonic-gate goto skiplp; 18250Sstevel@tonic-gate } 18260Sstevel@tonic-gate if (ap->an_pvp) { 18270Sstevel@tonic-gate swap_phys_free(ap->an_pvp, 18280Sstevel@tonic-gate ap->an_poff, PAGESIZE); 18295224Smec ap->an_pvp = NULL; 18305224Smec ap->an_poff = 0; 18310Sstevel@tonic-gate } 18320Sstevel@tonic-gate mutex_exit(ahm); 18330Sstevel@tonic-gate } 18340Sstevel@tonic-gate page_destroy_pages(root_pp); 18350Sstevel@tonic-gate segadvstat.MADV_FREE_hit.value.ul += pgcnt; 18360Sstevel@tonic-gate anon_array_exit(&cookie); 18370Sstevel@tonic-gate continue; 18380Sstevel@tonic-gate } 18390Sstevel@tonic-gate skiplp: 18400Sstevel@tonic-gate segadvstat.MADV_FREE_miss.value.ul += pgcnt; 1841414Skchow for (i = 0, pp = root_pp; i < idx; pp++, i++) 18420Sstevel@tonic-gate page_unlock(pp); 18430Sstevel@tonic-gate anon_array_exit(&cookie); 18440Sstevel@tonic-gate } 18450Sstevel@tonic-gate } 18460Sstevel@tonic-gate 18470Sstevel@tonic-gate /* 18480Sstevel@tonic-gate * Return the kept page(s) and protections back to the segment driver. 18490Sstevel@tonic-gate */ 18500Sstevel@tonic-gate int 18510Sstevel@tonic-gate anon_getpage( 18520Sstevel@tonic-gate struct anon **app, 18530Sstevel@tonic-gate uint_t *protp, 18540Sstevel@tonic-gate page_t *pl[], 18550Sstevel@tonic-gate size_t plsz, 18560Sstevel@tonic-gate struct seg *seg, 18570Sstevel@tonic-gate caddr_t addr, 18580Sstevel@tonic-gate enum seg_rw rw, 18590Sstevel@tonic-gate struct cred *cred) 18600Sstevel@tonic-gate { 18610Sstevel@tonic-gate page_t *pp; 18620Sstevel@tonic-gate struct anon *ap = *app; 18630Sstevel@tonic-gate struct vnode *vp; 18640Sstevel@tonic-gate anoff_t off; 18650Sstevel@tonic-gate int err; 18660Sstevel@tonic-gate kmutex_t *ahm; 18670Sstevel@tonic-gate 18680Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 18690Sstevel@tonic-gate 18700Sstevel@tonic-gate /* 18710Sstevel@tonic-gate * Lookup the page. If page is being paged in, 18720Sstevel@tonic-gate * wait for it to finish as we must return a list of 18730Sstevel@tonic-gate * pages since this routine acts like the VOP_GETPAGE 18740Sstevel@tonic-gate * routine does. 18750Sstevel@tonic-gate */ 18760Sstevel@tonic-gate if (pl != NULL && (pp = page_lookup(vp, (u_offset_t)off, SE_SHARED))) { 187712173SMichael.Corcoran@Sun.COM ahm = AH_MUTEX(ap->an_vp, ap->an_off); 18780Sstevel@tonic-gate mutex_enter(ahm); 18790Sstevel@tonic-gate if (ap->an_refcnt == 1) 18800Sstevel@tonic-gate *protp = PROT_ALL; 18810Sstevel@tonic-gate else 18820Sstevel@tonic-gate *protp = PROT_ALL & ~PROT_WRITE; 18830Sstevel@tonic-gate mutex_exit(ahm); 18840Sstevel@tonic-gate pl[0] = pp; 18850Sstevel@tonic-gate pl[1] = NULL; 18860Sstevel@tonic-gate return (0); 18870Sstevel@tonic-gate } 18880Sstevel@tonic-gate 18890Sstevel@tonic-gate /* 18900Sstevel@tonic-gate * Simply treat it as a vnode fault on the anon vp. 18910Sstevel@tonic-gate */ 18920Sstevel@tonic-gate 18930Sstevel@tonic-gate TRACE_3(TR_FAC_VM, TR_ANON_GETPAGE, 18945466Skchow "anon_getpage:seg %x addr %x vp %x", 18955466Skchow seg, addr, vp); 18960Sstevel@tonic-gate 18970Sstevel@tonic-gate err = VOP_GETPAGE(vp, (u_offset_t)off, PAGESIZE, protp, pl, plsz, 18985331Samw seg, addr, rw, cred, NULL); 18990Sstevel@tonic-gate 19000Sstevel@tonic-gate if (err == 0 && pl != NULL) { 190112173SMichael.Corcoran@Sun.COM ahm = AH_MUTEX(ap->an_vp, ap->an_off); 19020Sstevel@tonic-gate mutex_enter(ahm); 19030Sstevel@tonic-gate if (ap->an_refcnt != 1) 19040Sstevel@tonic-gate *protp &= ~PROT_WRITE; /* make read-only */ 19050Sstevel@tonic-gate mutex_exit(ahm); 19060Sstevel@tonic-gate } 19070Sstevel@tonic-gate return (err); 19080Sstevel@tonic-gate } 19090Sstevel@tonic-gate 19100Sstevel@tonic-gate /* 19110Sstevel@tonic-gate * Creates or returns kept pages to the segment driver. returns -1 if a large 19120Sstevel@tonic-gate * page cannot be allocated. returns -2 if some other process has allocated a 19130Sstevel@tonic-gate * larger page. 19140Sstevel@tonic-gate * 19155331Samw * For cowfault it will allocate any size pages to fill the requested area to 19165331Samw * avoid partially overwriting anon slots (i.e. sharing only some of the anon 19170Sstevel@tonic-gate * slots within a large page with other processes). This policy greatly 19180Sstevel@tonic-gate * simplifies large page freeing (which is only freed when all anon slot 19190Sstevel@tonic-gate * refcnts are 0). 19200Sstevel@tonic-gate */ 19210Sstevel@tonic-gate int 19220Sstevel@tonic-gate anon_map_getpages( 19230Sstevel@tonic-gate struct anon_map *amp, 19240Sstevel@tonic-gate ulong_t start_idx, 19250Sstevel@tonic-gate uint_t szc, 19260Sstevel@tonic-gate struct seg *seg, 19270Sstevel@tonic-gate caddr_t addr, 19280Sstevel@tonic-gate uint_t prot, 19290Sstevel@tonic-gate uint_t *protp, 19300Sstevel@tonic-gate page_t *ppa[], 19310Sstevel@tonic-gate uint_t *ppa_szc, 19320Sstevel@tonic-gate struct vpage vpage[], 19330Sstevel@tonic-gate enum seg_rw rw, 19340Sstevel@tonic-gate int brkcow, 19350Sstevel@tonic-gate int anypgsz, 19364426Saguzovsk int pgflags, 19370Sstevel@tonic-gate struct cred *cred) 19380Sstevel@tonic-gate { 19390Sstevel@tonic-gate pgcnt_t pgcnt; 19400Sstevel@tonic-gate struct anon *ap; 19410Sstevel@tonic-gate struct vnode *vp; 19420Sstevel@tonic-gate anoff_t off; 19430Sstevel@tonic-gate page_t *pp, *pl[2], *conpp = NULL; 19440Sstevel@tonic-gate caddr_t vaddr; 19450Sstevel@tonic-gate ulong_t pg_idx, an_idx, i; 19460Sstevel@tonic-gate spgcnt_t nreloc = 0; 19470Sstevel@tonic-gate int prealloc = 1; 19480Sstevel@tonic-gate int err, slotcreate; 19490Sstevel@tonic-gate uint_t vpprot; 19502414Saguzovsk int upsize = (szc < seg->s_szc); 19510Sstevel@tonic-gate 19520Sstevel@tonic-gate #if !defined(__i386) && !defined(__amd64) 19530Sstevel@tonic-gate ASSERT(seg->s_szc != 0); 19540Sstevel@tonic-gate #endif 19550Sstevel@tonic-gate ASSERT(szc <= seg->s_szc); 19560Sstevel@tonic-gate ASSERT(ppa_szc != NULL); 19570Sstevel@tonic-gate ASSERT(rw != S_CREATE); 19580Sstevel@tonic-gate 19590Sstevel@tonic-gate *protp = PROT_ALL; 19600Sstevel@tonic-gate 19610Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[0]); 19620Sstevel@tonic-gate 19630Sstevel@tonic-gate if (szc == 0) { 19640Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[1]); 19650Sstevel@tonic-gate if ((ap = anon_get_ptr(amp->ahp, start_idx)) != NULL) { 19660Sstevel@tonic-gate err = anon_getpage(&ap, protp, pl, PAGESIZE, seg, 19670Sstevel@tonic-gate addr, rw, cred); 19680Sstevel@tonic-gate if (err) 19690Sstevel@tonic-gate return (err); 19700Sstevel@tonic-gate ppa[0] = pl[0]; 19710Sstevel@tonic-gate if (brkcow == 0 || (*protp & PROT_WRITE)) { 19720Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[2]); 19732414Saguzovsk if (ppa[0]->p_szc != 0 && upsize) { 19740Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[3]); 19752414Saguzovsk *ppa_szc = MIN(ppa[0]->p_szc, 19762414Saguzovsk seg->s_szc); 19770Sstevel@tonic-gate page_unlock(ppa[0]); 19780Sstevel@tonic-gate return (-2); 19790Sstevel@tonic-gate } 19800Sstevel@tonic-gate return (0); 19810Sstevel@tonic-gate } 19820Sstevel@tonic-gate panic("anon_map_getpages: cowfault for szc 0"); 19830Sstevel@tonic-gate } else { 19840Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[4]); 19850Sstevel@tonic-gate ppa[0] = anon_zero(seg, addr, &ap, cred); 19860Sstevel@tonic-gate if (ppa[0] == NULL) 19870Sstevel@tonic-gate return (ENOMEM); 19880Sstevel@tonic-gate (void) anon_set_ptr(amp->ahp, start_idx, ap, 19890Sstevel@tonic-gate ANON_SLEEP); 19900Sstevel@tonic-gate return (0); 19910Sstevel@tonic-gate } 19920Sstevel@tonic-gate } 19930Sstevel@tonic-gate 19940Sstevel@tonic-gate pgcnt = page_get_pagecnt(szc); 19950Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pgcnt, pgcnt)); 19960Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(start_idx, pgcnt)); 19970Sstevel@tonic-gate 19980Sstevel@tonic-gate /* 19990Sstevel@tonic-gate * First we check for the case that the requtested large 20000Sstevel@tonic-gate * page or larger page already exists in the system. 20010Sstevel@tonic-gate * Actually we only check if the first constituent page 20020Sstevel@tonic-gate * exists and only preallocate if it's not found. 20030Sstevel@tonic-gate */ 20040Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, start_idx); 20050Sstevel@tonic-gate if (ap) { 20060Sstevel@tonic-gate uint_t pszc; 20070Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 20080Sstevel@tonic-gate if (page_exists_forreal(vp, (u_offset_t)off, &pszc)) { 20092414Saguzovsk if (pszc > szc && upsize) { 20102414Saguzovsk *ppa_szc = MIN(pszc, seg->s_szc); 20110Sstevel@tonic-gate return (-2); 20120Sstevel@tonic-gate } 20132414Saguzovsk if (pszc >= szc) { 20140Sstevel@tonic-gate prealloc = 0; 20150Sstevel@tonic-gate } 20160Sstevel@tonic-gate } 20170Sstevel@tonic-gate } 20180Sstevel@tonic-gate 20190Sstevel@tonic-gate VM_STAT_COND_ADD(prealloc == 0, anonvmstats.getpages[5]); 20200Sstevel@tonic-gate VM_STAT_COND_ADD(prealloc != 0, anonvmstats.getpages[6]); 20210Sstevel@tonic-gate 20220Sstevel@tonic-gate top: 20230Sstevel@tonic-gate /* 20240Sstevel@tonic-gate * If a smaller page or no page at all was found, 20250Sstevel@tonic-gate * grab a large page off the freelist. 20260Sstevel@tonic-gate */ 20270Sstevel@tonic-gate if (prealloc) { 20280Sstevel@tonic-gate ASSERT(conpp == NULL); 2029749Ssusans if (page_alloc_pages(anon_vp, seg, addr, NULL, ppa, 20304426Saguzovsk szc, 0, pgflags) != 0) { 20310Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[7]); 20325466Skchow if (brkcow == 0 || szc < seg->s_szc || 20335466Skchow !anon_szcshare(amp->ahp, start_idx)) { 20340Sstevel@tonic-gate /* 20350Sstevel@tonic-gate * If the refcnt's of all anon slots are <= 1 20360Sstevel@tonic-gate * they can't increase since we are holding 20370Sstevel@tonic-gate * the address space's lock. So segvn can 20380Sstevel@tonic-gate * safely decrease szc without risking to 20390Sstevel@tonic-gate * generate a cow fault for the region smaller 20400Sstevel@tonic-gate * than the segment's largest page size. 20410Sstevel@tonic-gate */ 20420Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[8]); 20430Sstevel@tonic-gate return (-1); 20440Sstevel@tonic-gate } 20450Sstevel@tonic-gate docow: 20460Sstevel@tonic-gate /* 20470Sstevel@tonic-gate * This is a cow fault. Copy away the entire 1 large 20480Sstevel@tonic-gate * page region of this segment. 20490Sstevel@tonic-gate */ 20500Sstevel@tonic-gate if (szc != seg->s_szc) 20510Sstevel@tonic-gate panic("anon_map_getpages: cowfault for szc %d", 20520Sstevel@tonic-gate szc); 20530Sstevel@tonic-gate vaddr = addr; 20540Sstevel@tonic-gate for (pg_idx = 0, an_idx = start_idx; pg_idx < pgcnt; 20550Sstevel@tonic-gate pg_idx++, an_idx++, vaddr += PAGESIZE) { 20560Sstevel@tonic-gate if ((ap = anon_get_ptr(amp->ahp, an_idx)) != 20570Sstevel@tonic-gate NULL) { 20580Sstevel@tonic-gate err = anon_getpage(&ap, &vpprot, pl, 20590Sstevel@tonic-gate PAGESIZE, seg, vaddr, rw, cred); 20600Sstevel@tonic-gate if (err) { 20610Sstevel@tonic-gate for (i = 0; i < pg_idx; i++) { 20620Sstevel@tonic-gate if ((pp = ppa[i]) != 20630Sstevel@tonic-gate NULL) 20640Sstevel@tonic-gate page_unlock(pp); 20650Sstevel@tonic-gate } 20660Sstevel@tonic-gate return (err); 20670Sstevel@tonic-gate } 20680Sstevel@tonic-gate ppa[pg_idx] = pl[0]; 20690Sstevel@tonic-gate } else { 20700Sstevel@tonic-gate /* 20710Sstevel@tonic-gate * Since this is a cowfault we know 20720Sstevel@tonic-gate * that this address space has a 20730Sstevel@tonic-gate * parent or children which means 20740Sstevel@tonic-gate * anon_dup_fill_holes() has initialized 20750Sstevel@tonic-gate * all anon slots within a large page 20760Sstevel@tonic-gate * region that had at least one anon 20770Sstevel@tonic-gate * slot at the time of fork(). 20780Sstevel@tonic-gate */ 20790Sstevel@tonic-gate panic("anon_map_getpages: " 20800Sstevel@tonic-gate "cowfault but anon slot is empty"); 20810Sstevel@tonic-gate } 20820Sstevel@tonic-gate } 20830Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[9]); 20840Sstevel@tonic-gate *protp = PROT_ALL; 20850Sstevel@tonic-gate return (anon_map_privatepages(amp, start_idx, szc, seg, 20864426Saguzovsk addr, prot, ppa, vpage, anypgsz, pgflags, cred)); 20870Sstevel@tonic-gate } 20880Sstevel@tonic-gate } 20890Sstevel@tonic-gate 20900Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[10]); 20910Sstevel@tonic-gate 20920Sstevel@tonic-gate an_idx = start_idx; 20930Sstevel@tonic-gate pg_idx = 0; 20940Sstevel@tonic-gate vaddr = addr; 20950Sstevel@tonic-gate while (pg_idx < pgcnt) { 20960Sstevel@tonic-gate slotcreate = 0; 20970Sstevel@tonic-gate if ((ap = anon_get_ptr(amp->ahp, an_idx)) == NULL) { 20980Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[11]); 20990Sstevel@tonic-gate /* 21000Sstevel@tonic-gate * For us to have decided not to preallocate 21010Sstevel@tonic-gate * would have meant that a large page 21020Sstevel@tonic-gate * was found. Which also means that all of the 21030Sstevel@tonic-gate * anon slots for that page would have been 21040Sstevel@tonic-gate * already created for us. 21050Sstevel@tonic-gate */ 21060Sstevel@tonic-gate if (prealloc == 0) 21070Sstevel@tonic-gate panic("anon_map_getpages: prealloc = 0"); 21080Sstevel@tonic-gate 21090Sstevel@tonic-gate slotcreate = 1; 21100Sstevel@tonic-gate ap = anon_alloc(NULL, 0); 21110Sstevel@tonic-gate } 21120Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 21130Sstevel@tonic-gate 21140Sstevel@tonic-gate /* 21150Sstevel@tonic-gate * Now setup our preallocated page to pass down 21160Sstevel@tonic-gate * to swap_getpage(). 21170Sstevel@tonic-gate */ 21180Sstevel@tonic-gate if (prealloc) { 21190Sstevel@tonic-gate ASSERT(ppa[pg_idx]->p_szc == szc); 21200Sstevel@tonic-gate conpp = ppa[pg_idx]; 21210Sstevel@tonic-gate } 21220Sstevel@tonic-gate ASSERT(prealloc || conpp == NULL); 21230Sstevel@tonic-gate 21240Sstevel@tonic-gate /* 21250Sstevel@tonic-gate * If we just created this anon slot then call 21260Sstevel@tonic-gate * with S_CREATE to prevent doing IO on the page. 21270Sstevel@tonic-gate * Similar to the anon_zero case. 21280Sstevel@tonic-gate */ 21290Sstevel@tonic-gate err = swap_getconpage(vp, (u_offset_t)off, PAGESIZE, 21302414Saguzovsk NULL, pl, PAGESIZE, conpp, ppa_szc, &nreloc, seg, vaddr, 21310Sstevel@tonic-gate slotcreate == 1 ? S_CREATE : rw, cred); 21320Sstevel@tonic-gate 21330Sstevel@tonic-gate if (err) { 21342414Saguzovsk ASSERT(err != -2 || upsize); 21350Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[12]); 21360Sstevel@tonic-gate ASSERT(slotcreate == 0); 21370Sstevel@tonic-gate goto io_err; 21380Sstevel@tonic-gate } 21390Sstevel@tonic-gate 21400Sstevel@tonic-gate pp = pl[0]; 21410Sstevel@tonic-gate 21422414Saguzovsk if (pp->p_szc < szc || (pp->p_szc > szc && upsize)) { 21430Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[13]); 21440Sstevel@tonic-gate ASSERT(slotcreate == 0); 21450Sstevel@tonic-gate ASSERT(prealloc == 0); 21460Sstevel@tonic-gate ASSERT(pg_idx == 0); 21470Sstevel@tonic-gate if (pp->p_szc > szc) { 21482414Saguzovsk ASSERT(upsize); 21492414Saguzovsk *ppa_szc = MIN(pp->p_szc, seg->s_szc); 21500Sstevel@tonic-gate page_unlock(pp); 21510Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[14]); 21520Sstevel@tonic-gate return (-2); 21530Sstevel@tonic-gate } 21540Sstevel@tonic-gate page_unlock(pp); 21550Sstevel@tonic-gate prealloc = 1; 21560Sstevel@tonic-gate goto top; 21570Sstevel@tonic-gate } 21580Sstevel@tonic-gate 21590Sstevel@tonic-gate /* 21600Sstevel@tonic-gate * If we decided to preallocate but VOP_GETPAGE 21610Sstevel@tonic-gate * found a page in the system that satisfies our 21620Sstevel@tonic-gate * request then free up our preallocated large page 21630Sstevel@tonic-gate * and continue looping accross the existing large 21640Sstevel@tonic-gate * page via VOP_GETPAGE. 21650Sstevel@tonic-gate */ 21660Sstevel@tonic-gate if (prealloc && pp != ppa[pg_idx]) { 21670Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[15]); 21680Sstevel@tonic-gate ASSERT(slotcreate == 0); 21690Sstevel@tonic-gate ASSERT(pg_idx == 0); 21700Sstevel@tonic-gate conpp = NULL; 21710Sstevel@tonic-gate prealloc = 0; 21720Sstevel@tonic-gate page_free_pages(ppa[0]); 21730Sstevel@tonic-gate } 21740Sstevel@tonic-gate 21750Sstevel@tonic-gate if (prealloc && nreloc > 1) { 21760Sstevel@tonic-gate /* 21770Sstevel@tonic-gate * we have relocated out of a smaller large page. 21780Sstevel@tonic-gate * skip npgs - 1 iterations and continue which will 21790Sstevel@tonic-gate * increment by one the loop indices. 21800Sstevel@tonic-gate */ 21810Sstevel@tonic-gate spgcnt_t npgs = nreloc; 21820Sstevel@tonic-gate 21830Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[16]); 21840Sstevel@tonic-gate 21850Sstevel@tonic-gate ASSERT(pp == ppa[pg_idx]); 21860Sstevel@tonic-gate ASSERT(slotcreate == 0); 21870Sstevel@tonic-gate ASSERT(pg_idx + npgs <= pgcnt); 21880Sstevel@tonic-gate if ((*protp & PROT_WRITE) && 21890Sstevel@tonic-gate anon_share(amp->ahp, an_idx, npgs)) { 21905466Skchow *protp &= ~PROT_WRITE; 21910Sstevel@tonic-gate } 21920Sstevel@tonic-gate pg_idx += npgs; 21930Sstevel@tonic-gate an_idx += npgs; 21940Sstevel@tonic-gate vaddr += PAGESIZE * npgs; 21950Sstevel@tonic-gate continue; 21960Sstevel@tonic-gate } 21970Sstevel@tonic-gate 21980Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[17]); 21990Sstevel@tonic-gate 22000Sstevel@tonic-gate /* 22010Sstevel@tonic-gate * Anon_zero case. 22020Sstevel@tonic-gate */ 22030Sstevel@tonic-gate if (slotcreate) { 22040Sstevel@tonic-gate ASSERT(prealloc); 22050Sstevel@tonic-gate pagezero(pp, 0, PAGESIZE); 22060Sstevel@tonic-gate CPU_STATS_ADD_K(vm, zfod, 1); 22070Sstevel@tonic-gate hat_setrefmod(pp); 22080Sstevel@tonic-gate } 22090Sstevel@tonic-gate 22100Sstevel@tonic-gate ASSERT(prealloc == 0 || ppa[pg_idx] == pp); 22110Sstevel@tonic-gate ASSERT(prealloc != 0 || PAGE_SHARED(pp)); 22120Sstevel@tonic-gate ASSERT(prealloc == 0 || PAGE_EXCL(pp)); 22130Sstevel@tonic-gate 22140Sstevel@tonic-gate if (pg_idx > 0 && 22150Sstevel@tonic-gate ((page_pptonum(pp) != page_pptonum(ppa[pg_idx - 1]) + 1) || 22162414Saguzovsk (pp->p_szc != ppa[pg_idx - 1]->p_szc))) { 22170Sstevel@tonic-gate panic("anon_map_getpages: unexpected page"); 22182414Saguzovsk } else if (pg_idx == 0 && (page_pptonum(pp) & (pgcnt - 1))) { 22192414Saguzovsk panic("anon_map_getpages: unaligned page"); 22202414Saguzovsk } 22210Sstevel@tonic-gate 22220Sstevel@tonic-gate if (prealloc == 0) { 22230Sstevel@tonic-gate ppa[pg_idx] = pp; 22240Sstevel@tonic-gate } 22250Sstevel@tonic-gate 22260Sstevel@tonic-gate if (ap->an_refcnt > 1) { 22270Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[18]); 22280Sstevel@tonic-gate *protp &= ~PROT_WRITE; 22290Sstevel@tonic-gate } 22300Sstevel@tonic-gate 22310Sstevel@tonic-gate /* 22320Sstevel@tonic-gate * If this is a new anon slot then initialize 22330Sstevel@tonic-gate * the anon array entry. 22340Sstevel@tonic-gate */ 22350Sstevel@tonic-gate if (slotcreate) { 22360Sstevel@tonic-gate (void) anon_set_ptr(amp->ahp, an_idx, ap, ANON_SLEEP); 22370Sstevel@tonic-gate } 22380Sstevel@tonic-gate pg_idx++; 22390Sstevel@tonic-gate an_idx++; 22400Sstevel@tonic-gate vaddr += PAGESIZE; 22410Sstevel@tonic-gate } 22420Sstevel@tonic-gate 22430Sstevel@tonic-gate /* 22440Sstevel@tonic-gate * Since preallocated pages come off the freelist 22450Sstevel@tonic-gate * they are locked SE_EXCL. Simply downgrade and return. 22460Sstevel@tonic-gate */ 22470Sstevel@tonic-gate if (prealloc) { 22480Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[19]); 22490Sstevel@tonic-gate conpp = NULL; 22500Sstevel@tonic-gate for (pg_idx = 0; pg_idx < pgcnt; pg_idx++) { 22510Sstevel@tonic-gate page_downgrade(ppa[pg_idx]); 22520Sstevel@tonic-gate } 22530Sstevel@tonic-gate } 22540Sstevel@tonic-gate ASSERT(conpp == NULL); 22550Sstevel@tonic-gate 22560Sstevel@tonic-gate if (brkcow == 0 || (*protp & PROT_WRITE)) { 22570Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[20]); 22580Sstevel@tonic-gate return (0); 22590Sstevel@tonic-gate } 22600Sstevel@tonic-gate 22610Sstevel@tonic-gate if (szc < seg->s_szc) 22620Sstevel@tonic-gate panic("anon_map_getpages: cowfault for szc %d", szc); 22630Sstevel@tonic-gate 22640Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[21]); 22650Sstevel@tonic-gate 22660Sstevel@tonic-gate *protp = PROT_ALL; 22670Sstevel@tonic-gate return (anon_map_privatepages(amp, start_idx, szc, seg, addr, prot, 22684426Saguzovsk ppa, vpage, anypgsz, pgflags, cred)); 22690Sstevel@tonic-gate io_err: 22700Sstevel@tonic-gate /* 22710Sstevel@tonic-gate * We got an IO error somewhere in our large page. 22720Sstevel@tonic-gate * If we were using a preallocated page then just demote 22730Sstevel@tonic-gate * all the constituent pages that we've succeeded with sofar 22740Sstevel@tonic-gate * to PAGESIZE pages and leave them in the system 22750Sstevel@tonic-gate * unlocked. 22760Sstevel@tonic-gate */ 22770Sstevel@tonic-gate 22782414Saguzovsk ASSERT(err != -2 || ((pg_idx == 0) && upsize)); 22790Sstevel@tonic-gate 22800Sstevel@tonic-gate VM_STAT_COND_ADD(err > 0, anonvmstats.getpages[22]); 22810Sstevel@tonic-gate VM_STAT_COND_ADD(err == -1, anonvmstats.getpages[23]); 22820Sstevel@tonic-gate VM_STAT_COND_ADD(err == -2, anonvmstats.getpages[24]); 22830Sstevel@tonic-gate 22840Sstevel@tonic-gate if (prealloc) { 22850Sstevel@tonic-gate conpp = NULL; 22860Sstevel@tonic-gate if (pg_idx > 0) { 22870Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[25]); 22880Sstevel@tonic-gate for (i = 0; i < pgcnt; i++) { 22890Sstevel@tonic-gate pp = ppa[i]; 22900Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 22910Sstevel@tonic-gate ASSERT(pp->p_szc == szc); 22920Sstevel@tonic-gate pp->p_szc = 0; 22930Sstevel@tonic-gate } 22940Sstevel@tonic-gate for (i = 0; i < pg_idx; i++) { 22950Sstevel@tonic-gate ASSERT(!hat_page_is_mapped(ppa[i])); 22960Sstevel@tonic-gate page_unlock(ppa[i]); 22970Sstevel@tonic-gate } 22980Sstevel@tonic-gate /* 22990Sstevel@tonic-gate * Now free up the remaining unused constituent 23000Sstevel@tonic-gate * pages. 23010Sstevel@tonic-gate */ 23020Sstevel@tonic-gate while (pg_idx < pgcnt) { 23030Sstevel@tonic-gate ASSERT(!hat_page_is_mapped(ppa[pg_idx])); 23040Sstevel@tonic-gate page_free(ppa[pg_idx], 0); 23050Sstevel@tonic-gate pg_idx++; 23060Sstevel@tonic-gate } 23070Sstevel@tonic-gate } else { 23080Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[26]); 23090Sstevel@tonic-gate page_free_pages(ppa[0]); 23100Sstevel@tonic-gate } 23110Sstevel@tonic-gate } else { 23120Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[27]); 23130Sstevel@tonic-gate ASSERT(err > 0); 23140Sstevel@tonic-gate for (i = 0; i < pg_idx; i++) 23150Sstevel@tonic-gate page_unlock(ppa[i]); 23160Sstevel@tonic-gate } 23170Sstevel@tonic-gate ASSERT(conpp == NULL); 23180Sstevel@tonic-gate if (err != -1) 23190Sstevel@tonic-gate return (err); 23200Sstevel@tonic-gate /* 23210Sstevel@tonic-gate * we are here because we failed to relocate. 23220Sstevel@tonic-gate */ 23230Sstevel@tonic-gate ASSERT(prealloc); 23245466Skchow if (brkcow == 0 || szc < seg->s_szc || 23255466Skchow !anon_szcshare(amp->ahp, start_idx)) { 23260Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[28]); 23270Sstevel@tonic-gate return (-1); 23280Sstevel@tonic-gate } 23290Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.getpages[29]); 23300Sstevel@tonic-gate goto docow; 23310Sstevel@tonic-gate } 23320Sstevel@tonic-gate 23330Sstevel@tonic-gate 23340Sstevel@tonic-gate /* 23350Sstevel@tonic-gate * Turn a reference to an object or shared anon page 23360Sstevel@tonic-gate * into a private page with a copy of the data from the 23370Sstevel@tonic-gate * original page which is always locked by the caller. 23380Sstevel@tonic-gate * This routine unloads the translation and unlocks the 23390Sstevel@tonic-gate * original page, if it isn't being stolen, before returning 23400Sstevel@tonic-gate * to the caller. 23410Sstevel@tonic-gate * 23420Sstevel@tonic-gate * NOTE: The original anon slot is not freed by this routine 23430Sstevel@tonic-gate * It must be freed by the caller while holding the 23440Sstevel@tonic-gate * "anon_map" lock to prevent races which can occur if 23450Sstevel@tonic-gate * a process has multiple lwps in its address space. 23460Sstevel@tonic-gate */ 23470Sstevel@tonic-gate page_t * 23480Sstevel@tonic-gate anon_private( 23490Sstevel@tonic-gate struct anon **app, 23500Sstevel@tonic-gate struct seg *seg, 23510Sstevel@tonic-gate caddr_t addr, 23520Sstevel@tonic-gate uint_t prot, 23530Sstevel@tonic-gate page_t *opp, 23540Sstevel@tonic-gate int oppflags, 23550Sstevel@tonic-gate struct cred *cred) 23560Sstevel@tonic-gate { 23570Sstevel@tonic-gate struct anon *old = *app; 23580Sstevel@tonic-gate struct anon *new; 23590Sstevel@tonic-gate page_t *pp = NULL; 23600Sstevel@tonic-gate struct vnode *vp; 23610Sstevel@tonic-gate anoff_t off; 23620Sstevel@tonic-gate page_t *anon_pl[1 + 1]; 23630Sstevel@tonic-gate int err; 23640Sstevel@tonic-gate 23650Sstevel@tonic-gate if (oppflags & STEAL_PAGE) 23660Sstevel@tonic-gate ASSERT(PAGE_EXCL(opp)); 23670Sstevel@tonic-gate else 23680Sstevel@tonic-gate ASSERT(PAGE_LOCKED(opp)); 23690Sstevel@tonic-gate 23700Sstevel@tonic-gate CPU_STATS_ADD_K(vm, cow_fault, 1); 23710Sstevel@tonic-gate 23720Sstevel@tonic-gate /* Kernel probe */ 23730Sstevel@tonic-gate TNF_PROBE_1(anon_private, "vm pagefault", /* CSTYLED */, 23740Sstevel@tonic-gate tnf_opaque, address, addr); 23750Sstevel@tonic-gate 23760Sstevel@tonic-gate *app = new = anon_alloc(NULL, 0); 23770Sstevel@tonic-gate swap_xlate(new, &vp, &off); 23780Sstevel@tonic-gate 23790Sstevel@tonic-gate if (oppflags & STEAL_PAGE) { 23800Sstevel@tonic-gate page_rename(opp, vp, (u_offset_t)off); 23810Sstevel@tonic-gate pp = opp; 23820Sstevel@tonic-gate TRACE_5(TR_FAC_VM, TR_ANON_PRIVATE, 23835466Skchow "anon_private:seg %p addr %x pp %p vp %p off %lx", 23845466Skchow seg, addr, pp, vp, off); 23850Sstevel@tonic-gate hat_setmod(pp); 23860Sstevel@tonic-gate 23870Sstevel@tonic-gate /* bug 4026339 */ 23880Sstevel@tonic-gate page_downgrade(pp); 23890Sstevel@tonic-gate return (pp); 23900Sstevel@tonic-gate } 23910Sstevel@tonic-gate 23920Sstevel@tonic-gate /* 23930Sstevel@tonic-gate * Call the VOP_GETPAGE routine to create the page, thereby 23940Sstevel@tonic-gate * enabling the vnode driver to allocate any filesystem 23950Sstevel@tonic-gate * space (e.g., disk block allocation for UFS). This also 23960Sstevel@tonic-gate * prevents more than one page from being added to the 23970Sstevel@tonic-gate * vnode at the same time. 23980Sstevel@tonic-gate */ 23990Sstevel@tonic-gate err = VOP_GETPAGE(vp, (u_offset_t)off, PAGESIZE, NULL, 24005331Samw anon_pl, PAGESIZE, seg, addr, S_CREATE, cred, NULL); 24010Sstevel@tonic-gate if (err) 24020Sstevel@tonic-gate goto out; 24030Sstevel@tonic-gate 24040Sstevel@tonic-gate pp = anon_pl[0]; 24050Sstevel@tonic-gate 24060Sstevel@tonic-gate /* 24070Sstevel@tonic-gate * If the original page was locked, we need to move the lock 24080Sstevel@tonic-gate * to the new page by transfering 'cowcnt/lckcnt' of the original 24090Sstevel@tonic-gate * page to 'cowcnt/lckcnt' of the new page. 24100Sstevel@tonic-gate * 24110Sstevel@tonic-gate * See Statement at the beginning of segvn_lockop() and 24120Sstevel@tonic-gate * comments in page_pp_useclaim() regarding the way 24130Sstevel@tonic-gate * cowcnts/lckcnts are handled. 24140Sstevel@tonic-gate * 24150Sstevel@tonic-gate * Also availrmem must be decremented up front for read only mapping 24160Sstevel@tonic-gate * before calling page_pp_useclaim. page_pp_useclaim will bump it back 24170Sstevel@tonic-gate * if availrmem did not need to be decremented after all. 24180Sstevel@tonic-gate */ 24190Sstevel@tonic-gate if (oppflags & LOCK_PAGE) { 24200Sstevel@tonic-gate if ((prot & PROT_WRITE) == 0) { 24210Sstevel@tonic-gate mutex_enter(&freemem_lock); 24220Sstevel@tonic-gate if (availrmem > pages_pp_maximum) { 24230Sstevel@tonic-gate availrmem--; 24240Sstevel@tonic-gate pages_useclaim++; 24250Sstevel@tonic-gate } else { 24260Sstevel@tonic-gate mutex_exit(&freemem_lock); 24270Sstevel@tonic-gate goto out; 24280Sstevel@tonic-gate } 24290Sstevel@tonic-gate mutex_exit(&freemem_lock); 24300Sstevel@tonic-gate } 24310Sstevel@tonic-gate page_pp_useclaim(opp, pp, prot & PROT_WRITE); 24320Sstevel@tonic-gate } 24330Sstevel@tonic-gate 24340Sstevel@tonic-gate /* 24350Sstevel@tonic-gate * Now copy the contents from the original page, 24360Sstevel@tonic-gate * which is locked and loaded in the MMU by 24370Sstevel@tonic-gate * the caller to prevent yet another page fault. 24380Sstevel@tonic-gate */ 24393253Smec /* XXX - should set mod bit in here */ 24403253Smec if (ppcopy(opp, pp) == 0) { 24413253Smec /* 24423253Smec * Before ppcopy could hanlde UE or other faults, we 24433253Smec * would have panicked here, and still have no option 24443253Smec * but to do so now. 24453253Smec */ 24463253Smec panic("anon_private, ppcopy failed, opp = 0x%p, pp = 0x%p", 24477632SNick.Todd@Sun.COM (void *)opp, (void *)pp); 24483253Smec } 24490Sstevel@tonic-gate 24500Sstevel@tonic-gate hat_setrefmod(pp); /* mark as modified */ 24510Sstevel@tonic-gate 24520Sstevel@tonic-gate /* 24530Sstevel@tonic-gate * Unload the old translation. 24540Sstevel@tonic-gate */ 24550Sstevel@tonic-gate hat_unload(seg->s_as->a_hat, addr, PAGESIZE, HAT_UNLOAD); 24560Sstevel@tonic-gate 24570Sstevel@tonic-gate /* 24580Sstevel@tonic-gate * Free unmapped, unmodified original page. 24590Sstevel@tonic-gate * or release the lock on the original page, 24600Sstevel@tonic-gate * otherwise the process will sleep forever in 24610Sstevel@tonic-gate * anon_decref() waiting for the "exclusive" lock 24620Sstevel@tonic-gate * on the page. 24630Sstevel@tonic-gate */ 24640Sstevel@tonic-gate (void) page_release(opp, 1); 24650Sstevel@tonic-gate 24660Sstevel@tonic-gate /* 24670Sstevel@tonic-gate * we are done with page creation so downgrade the new 24680Sstevel@tonic-gate * page's selock to shared, this helps when multiple 24690Sstevel@tonic-gate * as_fault(...SOFTLOCK...) are done to the same 24700Sstevel@tonic-gate * page(aio) 24710Sstevel@tonic-gate */ 24720Sstevel@tonic-gate page_downgrade(pp); 24730Sstevel@tonic-gate 24740Sstevel@tonic-gate /* 24750Sstevel@tonic-gate * NOTE: The original anon slot must be freed by the 24760Sstevel@tonic-gate * caller while holding the "anon_map" lock, if we 24770Sstevel@tonic-gate * copied away from an anonymous page. 24780Sstevel@tonic-gate */ 24790Sstevel@tonic-gate return (pp); 24800Sstevel@tonic-gate 24810Sstevel@tonic-gate out: 24820Sstevel@tonic-gate *app = old; 24830Sstevel@tonic-gate if (pp) 24840Sstevel@tonic-gate page_unlock(pp); 24850Sstevel@tonic-gate anon_decref(new); 24860Sstevel@tonic-gate page_unlock(opp); 24870Sstevel@tonic-gate return ((page_t *)NULL); 24880Sstevel@tonic-gate } 24890Sstevel@tonic-gate 24900Sstevel@tonic-gate int 24910Sstevel@tonic-gate anon_map_privatepages( 24920Sstevel@tonic-gate struct anon_map *amp, 24930Sstevel@tonic-gate ulong_t start_idx, 24940Sstevel@tonic-gate uint_t szc, 24950Sstevel@tonic-gate struct seg *seg, 24960Sstevel@tonic-gate caddr_t addr, 24970Sstevel@tonic-gate uint_t prot, 24980Sstevel@tonic-gate page_t *ppa[], 24990Sstevel@tonic-gate struct vpage vpage[], 25000Sstevel@tonic-gate int anypgsz, 25014426Saguzovsk int pgflags, 25020Sstevel@tonic-gate struct cred *cred) 25030Sstevel@tonic-gate { 25040Sstevel@tonic-gate pgcnt_t pgcnt; 25050Sstevel@tonic-gate struct vnode *vp; 25060Sstevel@tonic-gate anoff_t off; 25070Sstevel@tonic-gate page_t *pl[2], *conpp = NULL; 25080Sstevel@tonic-gate int err; 25090Sstevel@tonic-gate int prealloc = 1; 25100Sstevel@tonic-gate struct anon *ap, *oldap; 25110Sstevel@tonic-gate caddr_t vaddr; 25120Sstevel@tonic-gate page_t *pplist, *pp; 25130Sstevel@tonic-gate ulong_t pg_idx, an_idx; 25140Sstevel@tonic-gate spgcnt_t nreloc = 0; 25150Sstevel@tonic-gate int pagelock = 0; 25160Sstevel@tonic-gate kmutex_t *ahmpages = NULL; 25170Sstevel@tonic-gate #ifdef DEBUG 25180Sstevel@tonic-gate int refcnt; 25190Sstevel@tonic-gate #endif 25200Sstevel@tonic-gate 25210Sstevel@tonic-gate ASSERT(szc != 0); 25220Sstevel@tonic-gate ASSERT(szc == seg->s_szc); 25230Sstevel@tonic-gate 25240Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.privatepages[0]); 25250Sstevel@tonic-gate 25260Sstevel@tonic-gate pgcnt = page_get_pagecnt(szc); 25270Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pgcnt, pgcnt)); 25280Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(start_idx, pgcnt)); 25290Sstevel@tonic-gate 25300Sstevel@tonic-gate ASSERT(amp != NULL); 25310Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, start_idx); 25320Sstevel@tonic-gate ASSERT(ap == NULL || ap->an_refcnt >= 1); 25330Sstevel@tonic-gate 25340Sstevel@tonic-gate VM_STAT_COND_ADD(ap == NULL, anonvmstats.privatepages[1]); 25350Sstevel@tonic-gate 25360Sstevel@tonic-gate /* 25370Sstevel@tonic-gate * Now try and allocate the large page. If we fail then just 25380Sstevel@tonic-gate * let VOP_GETPAGE give us PAGESIZE pages. Normally we let 25390Sstevel@tonic-gate * the caller make this decision but to avoid added complexity 25400Sstevel@tonic-gate * it's simplier to handle that case here. 25410Sstevel@tonic-gate */ 25420Sstevel@tonic-gate if (anypgsz == -1) { 25430Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.privatepages[2]); 25440Sstevel@tonic-gate prealloc = 0; 2545749Ssusans } else if (page_alloc_pages(anon_vp, seg, addr, &pplist, NULL, szc, 25464426Saguzovsk anypgsz, pgflags) != 0) { 25470Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.privatepages[3]); 25480Sstevel@tonic-gate prealloc = 0; 25490Sstevel@tonic-gate } 25500Sstevel@tonic-gate 25510Sstevel@tonic-gate /* 25520Sstevel@tonic-gate * make the decrement of all refcnts of all 25530Sstevel@tonic-gate * anon slots of a large page appear atomic by 25540Sstevel@tonic-gate * getting an anonpages_hash_lock for the 25550Sstevel@tonic-gate * first anon slot of a large page. 25560Sstevel@tonic-gate */ 25570Sstevel@tonic-gate if (ap != NULL) { 255812173SMichael.Corcoran@Sun.COM ahmpages = APH_MUTEX(ap->an_vp, ap->an_off); 25590Sstevel@tonic-gate mutex_enter(ahmpages); 25600Sstevel@tonic-gate if (ap->an_refcnt == 1) { 25610Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.privatepages[4]); 25620Sstevel@tonic-gate ASSERT(!anon_share(amp->ahp, start_idx, pgcnt)); 25630Sstevel@tonic-gate mutex_exit(ahmpages); 25640Sstevel@tonic-gate 25650Sstevel@tonic-gate if (prealloc) { 25660Sstevel@tonic-gate page_free_replacement_page(pplist); 25670Sstevel@tonic-gate page_create_putback(pgcnt); 25680Sstevel@tonic-gate } 25690Sstevel@tonic-gate ASSERT(ppa[0]->p_szc <= szc); 25700Sstevel@tonic-gate if (ppa[0]->p_szc == szc) { 25710Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.privatepages[5]); 25720Sstevel@tonic-gate return (0); 25730Sstevel@tonic-gate } 25740Sstevel@tonic-gate for (pg_idx = 0; pg_idx < pgcnt; pg_idx++) { 25750Sstevel@tonic-gate ASSERT(ppa[pg_idx] != NULL); 25760Sstevel@tonic-gate page_unlock(ppa[pg_idx]); 25770Sstevel@tonic-gate } 25780Sstevel@tonic-gate return (-1); 25790Sstevel@tonic-gate } 25800Sstevel@tonic-gate } 25810Sstevel@tonic-gate 25820Sstevel@tonic-gate /* 25830Sstevel@tonic-gate * If we are passed in the vpage array and this is 25840Sstevel@tonic-gate * not PROT_WRITE then we need to decrement availrmem 25850Sstevel@tonic-gate * up front before we try anything. If we need to and 25860Sstevel@tonic-gate * can't decrement availrmem then its better to fail now 25870Sstevel@tonic-gate * than in the middle of processing the new large page. 25880Sstevel@tonic-gate * page_pp_usclaim() on behalf of each constituent page 25890Sstevel@tonic-gate * below will adjust availrmem back for the cases not needed. 25900Sstevel@tonic-gate */ 25910Sstevel@tonic-gate if (vpage != NULL && (prot & PROT_WRITE) == 0) { 25920Sstevel@tonic-gate for (pg_idx = 0; pg_idx < pgcnt; pg_idx++) { 25930Sstevel@tonic-gate if (VPP_ISPPLOCK(&vpage[pg_idx])) { 25940Sstevel@tonic-gate pagelock = 1; 25950Sstevel@tonic-gate break; 25960Sstevel@tonic-gate } 25970Sstevel@tonic-gate } 25980Sstevel@tonic-gate if (pagelock) { 25990Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.privatepages[6]); 26000Sstevel@tonic-gate mutex_enter(&freemem_lock); 26010Sstevel@tonic-gate if (availrmem >= pages_pp_maximum + pgcnt) { 26020Sstevel@tonic-gate availrmem -= pgcnt; 26030Sstevel@tonic-gate pages_useclaim += pgcnt; 26040Sstevel@tonic-gate } else { 26050Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.privatepages[7]); 26060Sstevel@tonic-gate mutex_exit(&freemem_lock); 26070Sstevel@tonic-gate if (ahmpages != NULL) { 26080Sstevel@tonic-gate mutex_exit(ahmpages); 26090Sstevel@tonic-gate } 26100Sstevel@tonic-gate if (prealloc) { 26110Sstevel@tonic-gate page_free_replacement_page(pplist); 26120Sstevel@tonic-gate page_create_putback(pgcnt); 26130Sstevel@tonic-gate } 26140Sstevel@tonic-gate for (pg_idx = 0; pg_idx < pgcnt; pg_idx++) 26150Sstevel@tonic-gate if (ppa[pg_idx] != NULL) 26160Sstevel@tonic-gate page_unlock(ppa[pg_idx]); 26170Sstevel@tonic-gate return (ENOMEM); 26180Sstevel@tonic-gate } 26190Sstevel@tonic-gate mutex_exit(&freemem_lock); 26200Sstevel@tonic-gate } 26210Sstevel@tonic-gate } 26220Sstevel@tonic-gate 26230Sstevel@tonic-gate CPU_STATS_ADD_K(vm, cow_fault, pgcnt); 26240Sstevel@tonic-gate 26250Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.privatepages[8]); 26260Sstevel@tonic-gate 26270Sstevel@tonic-gate an_idx = start_idx; 26280Sstevel@tonic-gate pg_idx = 0; 26290Sstevel@tonic-gate vaddr = addr; 26300Sstevel@tonic-gate for (; pg_idx < pgcnt; pg_idx++, an_idx++, vaddr += PAGESIZE) { 26310Sstevel@tonic-gate ASSERT(ppa[pg_idx] != NULL); 26320Sstevel@tonic-gate oldap = anon_get_ptr(amp->ahp, an_idx); 26330Sstevel@tonic-gate ASSERT(ahmpages != NULL || oldap == NULL); 26340Sstevel@tonic-gate ASSERT(ahmpages == NULL || oldap != NULL); 26350Sstevel@tonic-gate ASSERT(ahmpages == NULL || oldap->an_refcnt > 1); 26360Sstevel@tonic-gate ASSERT(ahmpages == NULL || pg_idx != 0 || 26370Sstevel@tonic-gate (refcnt = oldap->an_refcnt)); 26380Sstevel@tonic-gate ASSERT(ahmpages == NULL || pg_idx == 0 || 26390Sstevel@tonic-gate refcnt == oldap->an_refcnt); 26400Sstevel@tonic-gate 26410Sstevel@tonic-gate ap = anon_alloc(NULL, 0); 26420Sstevel@tonic-gate 26430Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 26440Sstevel@tonic-gate 26450Sstevel@tonic-gate /* 26460Sstevel@tonic-gate * Now setup our preallocated page to pass down to 26470Sstevel@tonic-gate * swap_getpage(). 26480Sstevel@tonic-gate */ 26490Sstevel@tonic-gate if (prealloc) { 26500Sstevel@tonic-gate pp = pplist; 26510Sstevel@tonic-gate page_sub(&pplist, pp); 26520Sstevel@tonic-gate conpp = pp; 26530Sstevel@tonic-gate } 26540Sstevel@tonic-gate 26550Sstevel@tonic-gate err = swap_getconpage(vp, (u_offset_t)off, PAGESIZE, NULL, pl, 26565466Skchow PAGESIZE, conpp, NULL, &nreloc, seg, vaddr, 26575466Skchow S_CREATE, cred); 26580Sstevel@tonic-gate 26590Sstevel@tonic-gate /* 26600Sstevel@tonic-gate * Impossible to fail this is S_CREATE. 26610Sstevel@tonic-gate */ 26620Sstevel@tonic-gate if (err) 26630Sstevel@tonic-gate panic("anon_map_privatepages: VOP_GETPAGE failed"); 26640Sstevel@tonic-gate 26650Sstevel@tonic-gate ASSERT(prealloc ? pp == pl[0] : pl[0]->p_szc == 0); 26660Sstevel@tonic-gate ASSERT(prealloc == 0 || nreloc == 1); 26670Sstevel@tonic-gate 26680Sstevel@tonic-gate pp = pl[0]; 26690Sstevel@tonic-gate 26700Sstevel@tonic-gate /* 26710Sstevel@tonic-gate * If the original page was locked, we need to move 26720Sstevel@tonic-gate * the lock to the new page by transfering 26730Sstevel@tonic-gate * 'cowcnt/lckcnt' of the original page to 'cowcnt/lckcnt' 26740Sstevel@tonic-gate * of the new page. pg_idx can be used to index 26750Sstevel@tonic-gate * into the vpage array since the caller will guarentee 26760Sstevel@tonic-gate * that vpage struct passed in corresponds to addr 26770Sstevel@tonic-gate * and forward. 26780Sstevel@tonic-gate */ 26790Sstevel@tonic-gate if (vpage != NULL && VPP_ISPPLOCK(&vpage[pg_idx])) { 26800Sstevel@tonic-gate page_pp_useclaim(ppa[pg_idx], pp, prot & PROT_WRITE); 26810Sstevel@tonic-gate } else if (pagelock) { 26820Sstevel@tonic-gate mutex_enter(&freemem_lock); 26830Sstevel@tonic-gate availrmem++; 26840Sstevel@tonic-gate pages_useclaim--; 26850Sstevel@tonic-gate mutex_exit(&freemem_lock); 26860Sstevel@tonic-gate } 26870Sstevel@tonic-gate 26880Sstevel@tonic-gate /* 26890Sstevel@tonic-gate * Now copy the contents from the original page. 26900Sstevel@tonic-gate */ 26913253Smec if (ppcopy(ppa[pg_idx], pp) == 0) { 26923253Smec /* 26933253Smec * Before ppcopy could hanlde UE or other faults, we 26943253Smec * would have panicked here, and still have no option 26953253Smec * but to do so now. 26963253Smec */ 26973253Smec panic("anon_map_privatepages, ppcopy failed"); 26983253Smec } 26990Sstevel@tonic-gate 27000Sstevel@tonic-gate hat_setrefmod(pp); /* mark as modified */ 27010Sstevel@tonic-gate 27020Sstevel@tonic-gate /* 27030Sstevel@tonic-gate * Release the lock on the original page, 27040Sstevel@tonic-gate * derement the old slot, and down grade the lock 27050Sstevel@tonic-gate * on the new copy. 27060Sstevel@tonic-gate */ 27070Sstevel@tonic-gate page_unlock(ppa[pg_idx]); 27080Sstevel@tonic-gate 27090Sstevel@tonic-gate if (!prealloc) 27100Sstevel@tonic-gate page_downgrade(pp); 27110Sstevel@tonic-gate 27120Sstevel@tonic-gate ppa[pg_idx] = pp; 27130Sstevel@tonic-gate 27140Sstevel@tonic-gate /* 27150Sstevel@tonic-gate * Now reflect the copy in the new anon array. 27160Sstevel@tonic-gate */ 27170Sstevel@tonic-gate ASSERT(ahmpages == NULL || oldap->an_refcnt > 1); 27180Sstevel@tonic-gate if (oldap != NULL) 27190Sstevel@tonic-gate anon_decref(oldap); 27200Sstevel@tonic-gate (void) anon_set_ptr(amp->ahp, an_idx, ap, ANON_SLEEP); 27210Sstevel@tonic-gate } 27226285Speterte 27236285Speterte /* 27246285Speterte * Unload the old large page translation. 27256285Speterte */ 27266285Speterte hat_unload(seg->s_as->a_hat, addr, pgcnt << PAGESHIFT, HAT_UNLOAD); 27276285Speterte 27280Sstevel@tonic-gate if (ahmpages != NULL) { 27290Sstevel@tonic-gate mutex_exit(ahmpages); 27300Sstevel@tonic-gate } 27310Sstevel@tonic-gate ASSERT(prealloc == 0 || pplist == NULL); 27320Sstevel@tonic-gate if (prealloc) { 27330Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.privatepages[9]); 27340Sstevel@tonic-gate for (pg_idx = 0; pg_idx < pgcnt; pg_idx++) { 27350Sstevel@tonic-gate page_downgrade(ppa[pg_idx]); 27360Sstevel@tonic-gate } 27370Sstevel@tonic-gate } 27380Sstevel@tonic-gate 27390Sstevel@tonic-gate return (0); 27400Sstevel@tonic-gate } 27410Sstevel@tonic-gate 27420Sstevel@tonic-gate /* 27430Sstevel@tonic-gate * Allocate a private zero-filled anon page. 27440Sstevel@tonic-gate */ 27450Sstevel@tonic-gate page_t * 27460Sstevel@tonic-gate anon_zero(struct seg *seg, caddr_t addr, struct anon **app, struct cred *cred) 27470Sstevel@tonic-gate { 27480Sstevel@tonic-gate struct anon *ap; 27490Sstevel@tonic-gate page_t *pp; 27500Sstevel@tonic-gate struct vnode *vp; 27510Sstevel@tonic-gate anoff_t off; 27520Sstevel@tonic-gate page_t *anon_pl[1 + 1]; 27530Sstevel@tonic-gate int err; 27540Sstevel@tonic-gate 27550Sstevel@tonic-gate /* Kernel probe */ 27560Sstevel@tonic-gate TNF_PROBE_1(anon_zero, "vm pagefault", /* CSTYLED */, 27570Sstevel@tonic-gate tnf_opaque, address, addr); 27580Sstevel@tonic-gate 27590Sstevel@tonic-gate *app = ap = anon_alloc(NULL, 0); 27600Sstevel@tonic-gate swap_xlate(ap, &vp, &off); 27610Sstevel@tonic-gate 27620Sstevel@tonic-gate /* 27630Sstevel@tonic-gate * Call the VOP_GETPAGE routine to create the page, thereby 27640Sstevel@tonic-gate * enabling the vnode driver to allocate any filesystem 27650Sstevel@tonic-gate * dependent structures (e.g., disk block allocation for UFS). 27660Sstevel@tonic-gate * This also prevents more than on page from being added to 27670Sstevel@tonic-gate * the vnode at the same time since it is locked. 27680Sstevel@tonic-gate */ 27690Sstevel@tonic-gate err = VOP_GETPAGE(vp, off, PAGESIZE, NULL, 27705331Samw anon_pl, PAGESIZE, seg, addr, S_CREATE, cred, NULL); 27710Sstevel@tonic-gate if (err) { 27720Sstevel@tonic-gate *app = NULL; 27730Sstevel@tonic-gate anon_decref(ap); 27740Sstevel@tonic-gate return (NULL); 27750Sstevel@tonic-gate } 27760Sstevel@tonic-gate pp = anon_pl[0]; 27770Sstevel@tonic-gate 27780Sstevel@tonic-gate pagezero(pp, 0, PAGESIZE); /* XXX - should set mod bit */ 27790Sstevel@tonic-gate page_downgrade(pp); 27800Sstevel@tonic-gate CPU_STATS_ADD_K(vm, zfod, 1); 27810Sstevel@tonic-gate hat_setrefmod(pp); /* mark as modified so pageout writes back */ 27820Sstevel@tonic-gate return (pp); 27830Sstevel@tonic-gate } 27840Sstevel@tonic-gate 27850Sstevel@tonic-gate 27860Sstevel@tonic-gate /* 27870Sstevel@tonic-gate * Allocate array of private zero-filled anon pages for empty slots 27880Sstevel@tonic-gate * and kept pages for non empty slots within given range. 27890Sstevel@tonic-gate * 27900Sstevel@tonic-gate * NOTE: This rontine will try and use large pages 27910Sstevel@tonic-gate * if available and supported by underlying platform. 27920Sstevel@tonic-gate */ 27930Sstevel@tonic-gate int 27940Sstevel@tonic-gate anon_map_createpages( 27950Sstevel@tonic-gate struct anon_map *amp, 27960Sstevel@tonic-gate ulong_t start_index, 27970Sstevel@tonic-gate size_t len, 27980Sstevel@tonic-gate page_t *ppa[], 27990Sstevel@tonic-gate struct seg *seg, 28000Sstevel@tonic-gate caddr_t addr, 28010Sstevel@tonic-gate enum seg_rw rw, 28020Sstevel@tonic-gate struct cred *cred) 28030Sstevel@tonic-gate { 28040Sstevel@tonic-gate 28050Sstevel@tonic-gate struct anon *ap; 28060Sstevel@tonic-gate struct vnode *ap_vp; 28070Sstevel@tonic-gate page_t *pp, *pplist, *anon_pl[1 + 1], *conpp = NULL; 28080Sstevel@tonic-gate int err = 0; 28090Sstevel@tonic-gate ulong_t p_index, index; 28100Sstevel@tonic-gate pgcnt_t npgs, pg_cnt; 28110Sstevel@tonic-gate spgcnt_t nreloc = 0; 28120Sstevel@tonic-gate uint_t l_szc, szc, prot; 28130Sstevel@tonic-gate anoff_t ap_off; 28140Sstevel@tonic-gate size_t pgsz; 28150Sstevel@tonic-gate lgrp_t *lgrp; 28164270Ssusans kmutex_t *ahm; 28170Sstevel@tonic-gate 28180Sstevel@tonic-gate /* 28190Sstevel@tonic-gate * XXX For now only handle S_CREATE. 28200Sstevel@tonic-gate */ 28210Sstevel@tonic-gate ASSERT(rw == S_CREATE); 28220Sstevel@tonic-gate 28230Sstevel@tonic-gate index = start_index; 28240Sstevel@tonic-gate p_index = 0; 28250Sstevel@tonic-gate npgs = btopr(len); 28260Sstevel@tonic-gate 28270Sstevel@tonic-gate /* 28280Sstevel@tonic-gate * If this platform supports multiple page sizes 28290Sstevel@tonic-gate * then try and allocate directly from the free 28300Sstevel@tonic-gate * list for pages larger than PAGESIZE. 28310Sstevel@tonic-gate * 28320Sstevel@tonic-gate * NOTE:When we have page_create_ru we can stop 28330Sstevel@tonic-gate * directly allocating from the freelist. 28340Sstevel@tonic-gate */ 28350Sstevel@tonic-gate l_szc = seg->s_szc; 28360Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER); 28370Sstevel@tonic-gate while (npgs) { 28380Sstevel@tonic-gate 28390Sstevel@tonic-gate /* 28400Sstevel@tonic-gate * if anon slot already exists 28410Sstevel@tonic-gate * (means page has been created) 28420Sstevel@tonic-gate * so 1) look up the page 28430Sstevel@tonic-gate * 2) if the page is still in memory, get it. 28440Sstevel@tonic-gate * 3) if not, create a page and 28450Sstevel@tonic-gate * page in from physical swap device. 28460Sstevel@tonic-gate * These are done in anon_getpage(). 28470Sstevel@tonic-gate */ 28480Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, index); 28490Sstevel@tonic-gate if (ap) { 28500Sstevel@tonic-gate err = anon_getpage(&ap, &prot, anon_pl, PAGESIZE, 28510Sstevel@tonic-gate seg, addr, S_READ, cred); 28520Sstevel@tonic-gate if (err) { 28530Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 28540Sstevel@tonic-gate panic("anon_map_createpages: anon_getpage"); 28550Sstevel@tonic-gate } 28560Sstevel@tonic-gate pp = anon_pl[0]; 28570Sstevel@tonic-gate ppa[p_index++] = pp; 28580Sstevel@tonic-gate 28594270Ssusans /* 28604270Ssusans * an_pvp can become non-NULL after SysV's page was 28614270Ssusans * paged out before ISM was attached to this SysV 28624270Ssusans * shared memory segment. So free swap slot if needed. 28634270Ssusans */ 28644270Ssusans if (ap->an_pvp != NULL) { 28654270Ssusans page_io_lock(pp); 286612173SMichael.Corcoran@Sun.COM ahm = AH_MUTEX(ap->an_vp, ap->an_off); 28674270Ssusans mutex_enter(ahm); 28684270Ssusans if (ap->an_pvp != NULL) { 28694270Ssusans swap_phys_free(ap->an_pvp, 28704270Ssusans ap->an_poff, PAGESIZE); 28714270Ssusans ap->an_pvp = NULL; 28724270Ssusans ap->an_poff = 0; 28734270Ssusans mutex_exit(ahm); 28744270Ssusans hat_setmod(pp); 28754270Ssusans } else { 28764270Ssusans mutex_exit(ahm); 28774270Ssusans } 28784270Ssusans page_io_unlock(pp); 28794270Ssusans } 28804270Ssusans 28810Sstevel@tonic-gate addr += PAGESIZE; 28820Sstevel@tonic-gate index++; 28830Sstevel@tonic-gate npgs--; 28840Sstevel@tonic-gate continue; 28850Sstevel@tonic-gate } 28860Sstevel@tonic-gate /* 28870Sstevel@tonic-gate * Now try and allocate the largest page possible 28880Sstevel@tonic-gate * for the current address and range. 28890Sstevel@tonic-gate * Keep dropping down in page size until: 28900Sstevel@tonic-gate * 28910Sstevel@tonic-gate * 1) Properly aligned 28920Sstevel@tonic-gate * 2) Does not overlap existing anon pages 28930Sstevel@tonic-gate * 3) Fits in remaining range. 28940Sstevel@tonic-gate * 4) able to allocate one. 28950Sstevel@tonic-gate * 28960Sstevel@tonic-gate * NOTE: XXX When page_create_ru is completed this code 28970Sstevel@tonic-gate * will change. 28980Sstevel@tonic-gate */ 28990Sstevel@tonic-gate szc = l_szc; 29000Sstevel@tonic-gate pplist = NULL; 29010Sstevel@tonic-gate pg_cnt = 0; 29020Sstevel@tonic-gate while (szc) { 29030Sstevel@tonic-gate pgsz = page_get_pagesize(szc); 29040Sstevel@tonic-gate pg_cnt = pgsz >> PAGESHIFT; 29050Sstevel@tonic-gate if (IS_P2ALIGNED(addr, pgsz) && pg_cnt <= npgs && 29065466Skchow anon_pages(amp->ahp, index, pg_cnt) == 0) { 29070Sstevel@tonic-gate /* 29080Sstevel@tonic-gate * XXX 29090Sstevel@tonic-gate * Since we are faking page_create() 29100Sstevel@tonic-gate * we also need to do the freemem and 29110Sstevel@tonic-gate * pcf accounting. 29120Sstevel@tonic-gate */ 29130Sstevel@tonic-gate (void) page_create_wait(pg_cnt, PG_WAIT); 29140Sstevel@tonic-gate 29150Sstevel@tonic-gate /* 29160Sstevel@tonic-gate * Get lgroup to allocate next page of shared 29170Sstevel@tonic-gate * memory from and use it to specify where to 29180Sstevel@tonic-gate * allocate the physical memory 29190Sstevel@tonic-gate */ 29200Sstevel@tonic-gate lgrp = lgrp_mem_choose(seg, addr, pgsz); 29210Sstevel@tonic-gate 29220Sstevel@tonic-gate pplist = page_get_freelist( 2923749Ssusans anon_vp, (u_offset_t)0, seg, 29240Sstevel@tonic-gate addr, pgsz, 0, lgrp); 29250Sstevel@tonic-gate 29260Sstevel@tonic-gate if (pplist == NULL) { 29270Sstevel@tonic-gate page_create_putback(pg_cnt); 29280Sstevel@tonic-gate } 29290Sstevel@tonic-gate 29300Sstevel@tonic-gate /* 29310Sstevel@tonic-gate * If a request for a page of size 29320Sstevel@tonic-gate * larger than PAGESIZE failed 29330Sstevel@tonic-gate * then don't try that size anymore. 29340Sstevel@tonic-gate */ 29350Sstevel@tonic-gate if (pplist == NULL) { 29360Sstevel@tonic-gate l_szc = szc - 1; 29370Sstevel@tonic-gate } else { 29380Sstevel@tonic-gate break; 29390Sstevel@tonic-gate } 29400Sstevel@tonic-gate } 29410Sstevel@tonic-gate szc--; 29420Sstevel@tonic-gate } 29430Sstevel@tonic-gate 29440Sstevel@tonic-gate /* 29450Sstevel@tonic-gate * If just using PAGESIZE pages then don't 29460Sstevel@tonic-gate * directly allocate from the free list. 29470Sstevel@tonic-gate */ 29480Sstevel@tonic-gate if (pplist == NULL) { 29490Sstevel@tonic-gate ASSERT(szc == 0); 29500Sstevel@tonic-gate pp = anon_zero(seg, addr, &ap, cred); 29510Sstevel@tonic-gate if (pp == NULL) { 29520Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 29530Sstevel@tonic-gate panic("anon_map_createpages: anon_zero"); 29540Sstevel@tonic-gate } 29550Sstevel@tonic-gate ppa[p_index++] = pp; 29560Sstevel@tonic-gate 29570Sstevel@tonic-gate ASSERT(anon_get_ptr(amp->ahp, index) == NULL); 29580Sstevel@tonic-gate (void) anon_set_ptr(amp->ahp, index, ap, ANON_SLEEP); 29590Sstevel@tonic-gate 29600Sstevel@tonic-gate addr += PAGESIZE; 29610Sstevel@tonic-gate index++; 29620Sstevel@tonic-gate npgs--; 29630Sstevel@tonic-gate continue; 29640Sstevel@tonic-gate } 29650Sstevel@tonic-gate 29660Sstevel@tonic-gate /* 29670Sstevel@tonic-gate * pplist is a list of pg_cnt PAGESIZE pages. 29680Sstevel@tonic-gate * These pages are locked SE_EXCL since they 29690Sstevel@tonic-gate * came directly off the free list. 29700Sstevel@tonic-gate */ 29710Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pg_cnt, pg_cnt)); 29720Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(index, pg_cnt)); 29730Sstevel@tonic-gate ASSERT(conpp == NULL); 29740Sstevel@tonic-gate while (pg_cnt--) { 29750Sstevel@tonic-gate 29760Sstevel@tonic-gate ap = anon_alloc(NULL, 0); 29770Sstevel@tonic-gate swap_xlate(ap, &ap_vp, &ap_off); 29780Sstevel@tonic-gate 29790Sstevel@tonic-gate ASSERT(pplist != NULL); 29800Sstevel@tonic-gate pp = pplist; 29810Sstevel@tonic-gate page_sub(&pplist, pp); 29820Sstevel@tonic-gate PP_CLRFREE(pp); 29830Sstevel@tonic-gate PP_CLRAGED(pp); 29840Sstevel@tonic-gate conpp = pp; 29850Sstevel@tonic-gate 29860Sstevel@tonic-gate err = swap_getconpage(ap_vp, ap_off, PAGESIZE, 29872414Saguzovsk (uint_t *)NULL, anon_pl, PAGESIZE, conpp, NULL, 29882414Saguzovsk &nreloc, seg, addr, S_CREATE, cred); 29890Sstevel@tonic-gate 29900Sstevel@tonic-gate if (err) { 29910Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 29920Sstevel@tonic-gate panic("anon_map_createpages: S_CREATE"); 29930Sstevel@tonic-gate } 29940Sstevel@tonic-gate 29950Sstevel@tonic-gate ASSERT(anon_pl[0] == pp); 29960Sstevel@tonic-gate ASSERT(nreloc == 1); 29970Sstevel@tonic-gate pagezero(pp, 0, PAGESIZE); 29980Sstevel@tonic-gate CPU_STATS_ADD_K(vm, zfod, 1); 29990Sstevel@tonic-gate hat_setrefmod(pp); 30000Sstevel@tonic-gate 30010Sstevel@tonic-gate ASSERT(anon_get_ptr(amp->ahp, index) == NULL); 30020Sstevel@tonic-gate (void) anon_set_ptr(amp->ahp, index, ap, ANON_SLEEP); 30030Sstevel@tonic-gate 30040Sstevel@tonic-gate ppa[p_index++] = pp; 30050Sstevel@tonic-gate 30060Sstevel@tonic-gate addr += PAGESIZE; 30070Sstevel@tonic-gate index++; 30080Sstevel@tonic-gate npgs--; 30090Sstevel@tonic-gate } 30100Sstevel@tonic-gate conpp = NULL; 30110Sstevel@tonic-gate pg_cnt = pgsz >> PAGESHIFT; 30120Sstevel@tonic-gate p_index = p_index - pg_cnt; 30130Sstevel@tonic-gate while (pg_cnt--) { 30140Sstevel@tonic-gate page_downgrade(ppa[p_index++]); 30150Sstevel@tonic-gate } 30160Sstevel@tonic-gate } 30170Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 30180Sstevel@tonic-gate return (0); 30190Sstevel@tonic-gate } 30200Sstevel@tonic-gate 30212414Saguzovsk static int 30222414Saguzovsk anon_try_demote_pages( 30232414Saguzovsk struct anon_hdr *ahp, 30242414Saguzovsk ulong_t sidx, 30252414Saguzovsk uint_t szc, 30262414Saguzovsk page_t **ppa, 30272414Saguzovsk int private) 30282414Saguzovsk { 30292414Saguzovsk struct anon *ap; 30302414Saguzovsk pgcnt_t pgcnt = page_get_pagecnt(szc); 30312414Saguzovsk page_t *pp; 30322414Saguzovsk pgcnt_t i; 30332414Saguzovsk kmutex_t *ahmpages = NULL; 30342414Saguzovsk int root = 0; 30352414Saguzovsk pgcnt_t npgs; 30362414Saguzovsk pgcnt_t curnpgs = 0; 30372414Saguzovsk size_t ppasize = 0; 30382414Saguzovsk 30392414Saguzovsk ASSERT(szc != 0); 30402414Saguzovsk ASSERT(IS_P2ALIGNED(pgcnt, pgcnt)); 30412414Saguzovsk ASSERT(IS_P2ALIGNED(sidx, pgcnt)); 30422414Saguzovsk ASSERT(sidx < ahp->size); 30432414Saguzovsk 30442414Saguzovsk if (ppa == NULL) { 30452414Saguzovsk ppasize = pgcnt * sizeof (page_t *); 30462414Saguzovsk ppa = kmem_alloc(ppasize, KM_SLEEP); 30472414Saguzovsk } 30482414Saguzovsk 30492414Saguzovsk ap = anon_get_ptr(ahp, sidx); 30502414Saguzovsk if (ap != NULL && private) { 30512414Saguzovsk VM_STAT_ADD(anonvmstats.demotepages[1]); 305212173SMichael.Corcoran@Sun.COM ahmpages = APH_MUTEX(ap->an_vp, ap->an_off); 30532414Saguzovsk mutex_enter(ahmpages); 30542414Saguzovsk } 30552414Saguzovsk 30562414Saguzovsk if (ap != NULL && ap->an_refcnt > 1) { 30572414Saguzovsk if (ahmpages != NULL) { 30582414Saguzovsk VM_STAT_ADD(anonvmstats.demotepages[2]); 30592414Saguzovsk mutex_exit(ahmpages); 30602414Saguzovsk } 30612414Saguzovsk if (ppasize != 0) { 30622414Saguzovsk kmem_free(ppa, ppasize); 30632414Saguzovsk } 30642414Saguzovsk return (0); 30652414Saguzovsk } 30662414Saguzovsk if (ahmpages != NULL) { 30672414Saguzovsk mutex_exit(ahmpages); 30682414Saguzovsk } 30692414Saguzovsk if (ahp->size - sidx < pgcnt) { 30702414Saguzovsk ASSERT(private == 0); 30712414Saguzovsk pgcnt = ahp->size - sidx; 30722414Saguzovsk } 30732414Saguzovsk for (i = 0; i < pgcnt; i++, sidx++) { 30742414Saguzovsk ap = anon_get_ptr(ahp, sidx); 30752414Saguzovsk if (ap != NULL) { 30762414Saguzovsk if (ap->an_refcnt != 1) { 30772414Saguzovsk panic("anon_try_demote_pages: an_refcnt != 1"); 30782414Saguzovsk } 30792414Saguzovsk pp = ppa[i] = page_lookup(ap->an_vp, ap->an_off, 30805466Skchow SE_EXCL); 30812414Saguzovsk if (pp != NULL) { 30822414Saguzovsk (void) hat_pageunload(pp, 30835466Skchow HAT_FORCE_PGUNLOAD); 30842414Saguzovsk } 30852414Saguzovsk } else { 30862414Saguzovsk ppa[i] = NULL; 30872414Saguzovsk } 30882414Saguzovsk } 30892414Saguzovsk for (i = 0; i < pgcnt; i++) { 30902414Saguzovsk if ((pp = ppa[i]) != NULL && pp->p_szc != 0) { 30912414Saguzovsk ASSERT(pp->p_szc <= szc); 30922414Saguzovsk if (!root) { 30932414Saguzovsk VM_STAT_ADD(anonvmstats.demotepages[3]); 30942414Saguzovsk if (curnpgs != 0) 30952414Saguzovsk panic("anon_try_demote_pages: " 30965466Skchow "bad large page"); 30972414Saguzovsk 30982414Saguzovsk root = 1; 30992414Saguzovsk curnpgs = npgs = 31005466Skchow page_get_pagecnt(pp->p_szc); 31012414Saguzovsk 31022414Saguzovsk ASSERT(npgs <= pgcnt); 31032414Saguzovsk ASSERT(IS_P2ALIGNED(npgs, npgs)); 31045466Skchow ASSERT(!(page_pptonum(pp) & (npgs - 1))); 31052414Saguzovsk } else { 31062414Saguzovsk ASSERT(i > 0); 31072414Saguzovsk ASSERT(page_pptonum(pp) - 1 == 31085466Skchow page_pptonum(ppa[i - 1])); 31092414Saguzovsk if ((page_pptonum(pp) & (npgs - 1)) == 31105466Skchow npgs - 1) 31112414Saguzovsk root = 0; 31122414Saguzovsk } 31132414Saguzovsk ASSERT(PAGE_EXCL(pp)); 31142414Saguzovsk pp->p_szc = 0; 31152414Saguzovsk ASSERT(curnpgs > 0); 31162414Saguzovsk curnpgs--; 31172414Saguzovsk } 31182414Saguzovsk } 31192414Saguzovsk if (root != 0 || curnpgs != 0) 31202414Saguzovsk panic("anon_try_demote_pages: bad large page"); 31212414Saguzovsk 31222414Saguzovsk for (i = 0; i < pgcnt; i++) { 31232414Saguzovsk if ((pp = ppa[i]) != NULL) { 31242414Saguzovsk ASSERT(!hat_page_is_mapped(pp)); 31252414Saguzovsk ASSERT(pp->p_szc == 0); 31262414Saguzovsk page_unlock(pp); 31272414Saguzovsk } 31282414Saguzovsk } 31292414Saguzovsk if (ppasize != 0) { 31302414Saguzovsk kmem_free(ppa, ppasize); 31312414Saguzovsk } 31322414Saguzovsk return (1); 31332414Saguzovsk } 31342414Saguzovsk 31352414Saguzovsk /* 31362414Saguzovsk * anon_map_demotepages() can only be called by MAP_PRIVATE segments. 31372414Saguzovsk */ 31380Sstevel@tonic-gate int 31390Sstevel@tonic-gate anon_map_demotepages( 31400Sstevel@tonic-gate struct anon_map *amp, 31410Sstevel@tonic-gate ulong_t start_idx, 31420Sstevel@tonic-gate struct seg *seg, 31430Sstevel@tonic-gate caddr_t addr, 31440Sstevel@tonic-gate uint_t prot, 31450Sstevel@tonic-gate struct vpage vpage[], 31460Sstevel@tonic-gate struct cred *cred) 31470Sstevel@tonic-gate { 31480Sstevel@tonic-gate struct anon *ap; 31490Sstevel@tonic-gate uint_t szc = seg->s_szc; 31500Sstevel@tonic-gate pgcnt_t pgcnt = page_get_pagecnt(szc); 31510Sstevel@tonic-gate size_t ppasize = pgcnt * sizeof (page_t *); 31520Sstevel@tonic-gate page_t **ppa = kmem_alloc(ppasize, KM_SLEEP); 31530Sstevel@tonic-gate page_t *pp; 31540Sstevel@tonic-gate page_t *pl[2]; 31550Sstevel@tonic-gate pgcnt_t i, pg_idx; 31560Sstevel@tonic-gate ulong_t an_idx; 31570Sstevel@tonic-gate caddr_t vaddr; 31580Sstevel@tonic-gate int err; 31590Sstevel@tonic-gate int retry = 0; 31600Sstevel@tonic-gate uint_t vpprot; 31610Sstevel@tonic-gate 31620Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&->a_rwlock)); 31630Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pgcnt, pgcnt)); 31640Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(start_idx, pgcnt)); 31650Sstevel@tonic-gate ASSERT(ppa != NULL); 31662414Saguzovsk ASSERT(szc != 0); 31672414Saguzovsk ASSERT(szc == amp->a_szc); 31680Sstevel@tonic-gate 31690Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.demotepages[0]); 31700Sstevel@tonic-gate 31710Sstevel@tonic-gate top: 31722414Saguzovsk if (anon_try_demote_pages(amp->ahp, start_idx, szc, ppa, 1)) { 31732482Saguzovsk kmem_free(ppa, ppasize); 31740Sstevel@tonic-gate return (0); 31750Sstevel@tonic-gate } 31760Sstevel@tonic-gate 31770Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.demotepages[4]); 31780Sstevel@tonic-gate 31790Sstevel@tonic-gate ASSERT(retry == 0); /* we can be here only once */ 31800Sstevel@tonic-gate 31810Sstevel@tonic-gate vaddr = addr; 31820Sstevel@tonic-gate for (pg_idx = 0, an_idx = start_idx; pg_idx < pgcnt; 31830Sstevel@tonic-gate pg_idx++, an_idx++, vaddr += PAGESIZE) { 31840Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, an_idx); 31850Sstevel@tonic-gate if (ap == NULL) 31860Sstevel@tonic-gate panic("anon_map_demotepages: no anon slot"); 31870Sstevel@tonic-gate err = anon_getpage(&ap, &vpprot, pl, PAGESIZE, seg, vaddr, 31880Sstevel@tonic-gate S_READ, cred); 31890Sstevel@tonic-gate if (err) { 31900Sstevel@tonic-gate for (i = 0; i < pg_idx; i++) { 31910Sstevel@tonic-gate if ((pp = ppa[i]) != NULL) 31920Sstevel@tonic-gate page_unlock(pp); 31930Sstevel@tonic-gate } 31940Sstevel@tonic-gate kmem_free(ppa, ppasize); 31950Sstevel@tonic-gate return (err); 31960Sstevel@tonic-gate } 31970Sstevel@tonic-gate ppa[pg_idx] = pl[0]; 31980Sstevel@tonic-gate } 31990Sstevel@tonic-gate 32000Sstevel@tonic-gate err = anon_map_privatepages(amp, start_idx, szc, seg, addr, prot, ppa, 32014426Saguzovsk vpage, -1, 0, cred); 32020Sstevel@tonic-gate if (err > 0) { 32030Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.demotepages[5]); 32040Sstevel@tonic-gate kmem_free(ppa, ppasize); 32050Sstevel@tonic-gate return (err); 32060Sstevel@tonic-gate } 32070Sstevel@tonic-gate ASSERT(err == 0 || err == -1); 32080Sstevel@tonic-gate if (err == -1) { 32090Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.demotepages[6]); 32100Sstevel@tonic-gate retry = 1; 32110Sstevel@tonic-gate goto top; 32120Sstevel@tonic-gate } 32130Sstevel@tonic-gate for (i = 0; i < pgcnt; i++) { 32140Sstevel@tonic-gate ASSERT(ppa[i] != NULL); 32150Sstevel@tonic-gate if (ppa[i]->p_szc != 0) 32160Sstevel@tonic-gate retry = 1; 32170Sstevel@tonic-gate page_unlock(ppa[i]); 32180Sstevel@tonic-gate } 32190Sstevel@tonic-gate if (retry) { 32200Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.demotepages[7]); 32210Sstevel@tonic-gate goto top; 32220Sstevel@tonic-gate } 32230Sstevel@tonic-gate 32240Sstevel@tonic-gate VM_STAT_ADD(anonvmstats.demotepages[8]); 32250Sstevel@tonic-gate 32260Sstevel@tonic-gate kmem_free(ppa, ppasize); 32270Sstevel@tonic-gate 32280Sstevel@tonic-gate return (0); 32290Sstevel@tonic-gate } 32300Sstevel@tonic-gate 32310Sstevel@tonic-gate /* 32322414Saguzovsk * Free pages of shared anon map. It's assumed that anon maps don't share anon 32332414Saguzovsk * structures with private anon maps. Therefore all anon structures should 32342414Saguzovsk * have at most one reference at this point. This means underlying pages can 32352414Saguzovsk * be exclusively locked and demoted or freed. If not freeing the entire 32362414Saguzovsk * large pages demote the ends of the region we free to be able to free 32375331Samw * subpages. Page roots correspond to aligned index positions in anon map. 32382414Saguzovsk */ 32392414Saguzovsk void 32402414Saguzovsk anon_shmap_free_pages(struct anon_map *amp, ulong_t sidx, size_t len) 32412414Saguzovsk { 32422414Saguzovsk ulong_t eidx = sidx + btopr(len); 32432414Saguzovsk pgcnt_t pages = page_get_pagecnt(amp->a_szc); 32442414Saguzovsk struct anon_hdr *ahp = amp->ahp; 32452414Saguzovsk ulong_t tidx; 32462414Saguzovsk size_t size; 32472414Saguzovsk ulong_t sidx_aligned; 32482414Saguzovsk ulong_t eidx_aligned; 32492414Saguzovsk 32506695Saguzovsk ASSERT(ANON_WRITE_HELD(&->a_rwlock)); 32512414Saguzovsk ASSERT(amp->refcnt <= 1); 32522414Saguzovsk ASSERT(amp->a_szc > 0); 32532414Saguzovsk ASSERT(eidx <= ahp->size); 32542414Saguzovsk ASSERT(!anon_share(ahp, sidx, btopr(len))); 32552414Saguzovsk 32562414Saguzovsk if (len == 0) { /* XXX */ 32572414Saguzovsk return; 32582414Saguzovsk } 32592414Saguzovsk 32602414Saguzovsk sidx_aligned = P2ALIGN(sidx, pages); 32612414Saguzovsk if (sidx_aligned != sidx || 32622414Saguzovsk (eidx < sidx_aligned + pages && eidx < ahp->size)) { 32632414Saguzovsk if (!anon_try_demote_pages(ahp, sidx_aligned, 32642414Saguzovsk amp->a_szc, NULL, 0)) { 32652414Saguzovsk panic("anon_shmap_free_pages: demote failed"); 32662414Saguzovsk } 32672414Saguzovsk size = (eidx <= sidx_aligned + pages) ? (eidx - sidx) : 32682414Saguzovsk P2NPHASE(sidx, pages); 32692414Saguzovsk size <<= PAGESHIFT; 32702414Saguzovsk anon_free(ahp, sidx, size); 32712414Saguzovsk sidx = sidx_aligned + pages; 32722414Saguzovsk if (eidx <= sidx) { 32732414Saguzovsk return; 32742414Saguzovsk } 32752414Saguzovsk } 32762414Saguzovsk eidx_aligned = P2ALIGN(eidx, pages); 32772414Saguzovsk if (sidx < eidx_aligned) { 32782414Saguzovsk anon_free_pages(ahp, sidx, 32792414Saguzovsk (eidx_aligned - sidx) << PAGESHIFT, 32802414Saguzovsk amp->a_szc); 32812414Saguzovsk sidx = eidx_aligned; 32822414Saguzovsk } 32832414Saguzovsk ASSERT(sidx == eidx_aligned); 32842414Saguzovsk if (eidx == eidx_aligned) { 32852414Saguzovsk return; 32862414Saguzovsk } 32872414Saguzovsk tidx = eidx; 32882414Saguzovsk if (eidx != ahp->size && anon_get_next_ptr(ahp, &tidx) != NULL && 32892414Saguzovsk tidx - sidx < pages) { 32902414Saguzovsk if (!anon_try_demote_pages(ahp, sidx, amp->a_szc, NULL, 0)) { 32912414Saguzovsk panic("anon_shmap_free_pages: demote failed"); 32922414Saguzovsk } 32932414Saguzovsk size = (eidx - sidx) << PAGESHIFT; 32942414Saguzovsk anon_free(ahp, sidx, size); 32952414Saguzovsk } else { 32962414Saguzovsk anon_free_pages(ahp, sidx, pages << PAGESHIFT, amp->a_szc); 32972414Saguzovsk } 32982414Saguzovsk } 32992414Saguzovsk 33002414Saguzovsk /* 33016695Saguzovsk * This routine should be called with amp's writer lock when there're no other 33026695Saguzovsk * users of amp. All pcache entries of this amp must have been already 33036695Saguzovsk * inactivated. We must not drop a_rwlock here to prevent new users from 33046695Saguzovsk * attaching to this amp. 33056695Saguzovsk */ 33066695Saguzovsk void 33076695Saguzovsk anonmap_purge(struct anon_map *amp) 33086695Saguzovsk { 33096695Saguzovsk ASSERT(ANON_WRITE_HELD(&->a_rwlock)); 33106695Saguzovsk ASSERT(amp->refcnt <= 1); 33116695Saguzovsk 33126695Saguzovsk if (amp->a_softlockcnt != 0) { 33136695Saguzovsk seg_ppurge(NULL, amp, 0); 33146695Saguzovsk } 33156695Saguzovsk 33166695Saguzovsk /* 33176695Saguzovsk * Since all pcache entries were already inactive before this routine 33186695Saguzovsk * was called seg_ppurge() couldn't return while there're still 33196695Saguzovsk * entries that can be found via the list anchored at a_phead. So we 33206695Saguzovsk * can assert this list is empty now. a_softlockcnt may be still non 0 33216695Saguzovsk * if asynchronous thread that manages pcache already removed pcache 33226695Saguzovsk * entries but hasn't unlocked the pages yet. If a_softlockcnt is non 33236695Saguzovsk * 0 we just wait on a_purgecv for shamp_reclaim() to finish. Even if 33246695Saguzovsk * a_softlockcnt is 0 we grab a_purgemtx to avoid freeing anon map 33256695Saguzovsk * before shamp_reclaim() is done with it. a_purgemtx also taken by 33266695Saguzovsk * shamp_reclaim() while a_softlockcnt was still not 0 acts as a 33276695Saguzovsk * barrier that prevents anonmap_purge() to complete while 33286695Saguzovsk * shamp_reclaim() may still be referencing this amp. 33296695Saguzovsk */ 33306695Saguzovsk ASSERT(amp->a_phead.p_lnext == &->a_phead); 33316695Saguzovsk ASSERT(amp->a_phead.p_lprev == &->a_phead); 33326695Saguzovsk 33336695Saguzovsk mutex_enter(&->a_purgemtx); 33346695Saguzovsk while (amp->a_softlockcnt != 0) { 33356695Saguzovsk ASSERT(amp->a_phead.p_lnext == &->a_phead); 33366695Saguzovsk ASSERT(amp->a_phead.p_lprev == &->a_phead); 33376695Saguzovsk amp->a_purgewait = 1; 33386695Saguzovsk cv_wait(&->a_purgecv, &->a_purgemtx); 33396695Saguzovsk } 33406695Saguzovsk mutex_exit(&->a_purgemtx); 33416695Saguzovsk 33426695Saguzovsk ASSERT(amp->a_phead.p_lnext == &->a_phead); 33436695Saguzovsk ASSERT(amp->a_phead.p_lprev == &->a_phead); 33446695Saguzovsk ASSERT(amp->a_softlockcnt == 0); 33456695Saguzovsk } 33466695Saguzovsk 33476695Saguzovsk /* 33480Sstevel@tonic-gate * Allocate and initialize an anon_map structure for seg 33490Sstevel@tonic-gate * associating the given swap reservation with the new anon_map. 33500Sstevel@tonic-gate */ 33510Sstevel@tonic-gate struct anon_map * 33524426Saguzovsk anonmap_alloc(size_t size, size_t swresv, int flags) 33530Sstevel@tonic-gate { 33540Sstevel@tonic-gate struct anon_map *amp; 33554426Saguzovsk int kmflags = (flags & ANON_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP; 33564426Saguzovsk 33574426Saguzovsk amp = kmem_cache_alloc(anonmap_cache, kmflags); 33584426Saguzovsk if (amp == NULL) { 33594426Saguzovsk ASSERT(kmflags == KM_NOSLEEP); 33604426Saguzovsk return (NULL); 33614426Saguzovsk } 33624426Saguzovsk 33634426Saguzovsk amp->ahp = anon_create(btopr(size), flags); 33644426Saguzovsk if (amp->ahp == NULL) { 33654426Saguzovsk ASSERT(flags == ANON_NOSLEEP); 33664426Saguzovsk kmem_cache_free(anonmap_cache, amp); 33674426Saguzovsk return (NULL); 33684426Saguzovsk } 33690Sstevel@tonic-gate amp->refcnt = 1; 33700Sstevel@tonic-gate amp->size = size; 33710Sstevel@tonic-gate amp->swresv = swresv; 33720Sstevel@tonic-gate amp->locality = 0; 33730Sstevel@tonic-gate amp->a_szc = 0; 33742768Ssl108498 amp->a_sp = NULL; 33756695Saguzovsk amp->a_softlockcnt = 0; 33766695Saguzovsk amp->a_purgewait = 0; 33776695Saguzovsk amp->a_phead.p_lnext = &->a_phead; 33786695Saguzovsk amp->a_phead.p_lprev = &->a_phead; 33796695Saguzovsk 33800Sstevel@tonic-gate return (amp); 33810Sstevel@tonic-gate } 33820Sstevel@tonic-gate 33830Sstevel@tonic-gate void 33840Sstevel@tonic-gate anonmap_free(struct anon_map *amp) 33850Sstevel@tonic-gate { 33866695Saguzovsk ASSERT(amp->ahp != NULL); 33870Sstevel@tonic-gate ASSERT(amp->refcnt == 0); 33886695Saguzovsk ASSERT(amp->a_softlockcnt == 0); 33896695Saguzovsk ASSERT(amp->a_phead.p_lnext == &->a_phead); 33906695Saguzovsk ASSERT(amp->a_phead.p_lprev == &->a_phead); 33910Sstevel@tonic-gate 33920Sstevel@tonic-gate lgrp_shm_policy_fini(amp, NULL); 33930Sstevel@tonic-gate anon_release(amp->ahp, btopr(amp->size)); 33940Sstevel@tonic-gate kmem_cache_free(anonmap_cache, amp); 33950Sstevel@tonic-gate } 33960Sstevel@tonic-gate 33970Sstevel@tonic-gate /* 33980Sstevel@tonic-gate * Returns true if the app array has some empty slots. 33995331Samw * The offp and lenp parameters are in/out parameters. On entry 34000Sstevel@tonic-gate * these values represent the starting offset and length of the 34010Sstevel@tonic-gate * mapping. When true is returned, these values may be modified 34020Sstevel@tonic-gate * to be the largest range which includes empty slots. 34030Sstevel@tonic-gate */ 34040Sstevel@tonic-gate int 34050Sstevel@tonic-gate non_anon(struct anon_hdr *ahp, ulong_t anon_idx, u_offset_t *offp, 34060Sstevel@tonic-gate size_t *lenp) 34070Sstevel@tonic-gate { 34080Sstevel@tonic-gate ulong_t i, el; 34090Sstevel@tonic-gate ssize_t low, high; 34100Sstevel@tonic-gate struct anon *ap; 34110Sstevel@tonic-gate 34120Sstevel@tonic-gate low = -1; 34130Sstevel@tonic-gate for (i = 0, el = *lenp; i < el; i += PAGESIZE, anon_idx++) { 34140Sstevel@tonic-gate ap = anon_get_ptr(ahp, anon_idx); 34150Sstevel@tonic-gate if (ap == NULL) { 34160Sstevel@tonic-gate if (low == -1) 34170Sstevel@tonic-gate low = i; 34180Sstevel@tonic-gate high = i; 34190Sstevel@tonic-gate } 34200Sstevel@tonic-gate } 34210Sstevel@tonic-gate if (low != -1) { 34220Sstevel@tonic-gate /* 34230Sstevel@tonic-gate * Found at least one non-anon page. 34240Sstevel@tonic-gate * Set up the off and len return values. 34250Sstevel@tonic-gate */ 34260Sstevel@tonic-gate if (low != 0) 34270Sstevel@tonic-gate *offp += low; 34280Sstevel@tonic-gate *lenp = high - low + PAGESIZE; 34290Sstevel@tonic-gate return (1); 34300Sstevel@tonic-gate } 34310Sstevel@tonic-gate return (0); 34320Sstevel@tonic-gate } 34330Sstevel@tonic-gate 34340Sstevel@tonic-gate /* 34350Sstevel@tonic-gate * Return a count of the number of existing anon pages in the anon array 34360Sstevel@tonic-gate * app in the range (off, off+len). The array and slots must be guaranteed 34370Sstevel@tonic-gate * stable by the caller. 34380Sstevel@tonic-gate */ 34390Sstevel@tonic-gate pgcnt_t 34400Sstevel@tonic-gate anon_pages(struct anon_hdr *ahp, ulong_t anon_index, pgcnt_t nslots) 34410Sstevel@tonic-gate { 34420Sstevel@tonic-gate pgcnt_t cnt = 0; 34430Sstevel@tonic-gate 34440Sstevel@tonic-gate while (nslots-- > 0) { 34450Sstevel@tonic-gate if ((anon_get_ptr(ahp, anon_index)) != NULL) 34460Sstevel@tonic-gate cnt++; 34470Sstevel@tonic-gate anon_index++; 34480Sstevel@tonic-gate } 34490Sstevel@tonic-gate return (cnt); 34500Sstevel@tonic-gate } 34510Sstevel@tonic-gate 34520Sstevel@tonic-gate /* 3453*13035SOndrej.Kubecka@Sun.COM * Move reserved phys swap into memory swap (unreserve phys swap 3454*13035SOndrej.Kubecka@Sun.COM * and reserve mem swap by the same amount). 3455*13035SOndrej.Kubecka@Sun.COM * Used by segspt when it needs to lock reserved swap npages in memory 34560Sstevel@tonic-gate */ 34570Sstevel@tonic-gate int 3458*13035SOndrej.Kubecka@Sun.COM anon_swap_adjust(pgcnt_t npages) 34590Sstevel@tonic-gate { 34600Sstevel@tonic-gate pgcnt_t unlocked_mem_swap; 34610Sstevel@tonic-gate 34620Sstevel@tonic-gate mutex_enter(&anoninfo_lock); 34630Sstevel@tonic-gate 34640Sstevel@tonic-gate ASSERT(k_anoninfo.ani_mem_resv >= k_anoninfo.ani_locked_swap); 34650Sstevel@tonic-gate ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv); 34660Sstevel@tonic-gate 34670Sstevel@tonic-gate unlocked_mem_swap = k_anoninfo.ani_mem_resv 34685466Skchow - k_anoninfo.ani_locked_swap; 34690Sstevel@tonic-gate if (npages > unlocked_mem_swap) { 34700Sstevel@tonic-gate spgcnt_t adjusted_swap = npages - unlocked_mem_swap; 34710Sstevel@tonic-gate 34720Sstevel@tonic-gate /* 34730Sstevel@tonic-gate * if there is not enough unlocked mem swap we take missing 34740Sstevel@tonic-gate * amount from phys swap and give it to mem swap 34750Sstevel@tonic-gate */ 3476*13035SOndrej.Kubecka@Sun.COM if (!page_reclaim_mem(adjusted_swap, segspt_minfree, 1)) { 34770Sstevel@tonic-gate mutex_exit(&anoninfo_lock); 34780Sstevel@tonic-gate return (ENOMEM); 34790Sstevel@tonic-gate } 34800Sstevel@tonic-gate 34810Sstevel@tonic-gate k_anoninfo.ani_mem_resv += adjusted_swap; 34820Sstevel@tonic-gate ASSERT(k_anoninfo.ani_phys_resv >= adjusted_swap); 34830Sstevel@tonic-gate k_anoninfo.ani_phys_resv -= adjusted_swap; 34840Sstevel@tonic-gate 34850Sstevel@tonic-gate ANI_ADD(adjusted_swap); 34860Sstevel@tonic-gate } 34870Sstevel@tonic-gate k_anoninfo.ani_locked_swap += npages; 34880Sstevel@tonic-gate 34890Sstevel@tonic-gate ASSERT(k_anoninfo.ani_mem_resv >= k_anoninfo.ani_locked_swap); 34900Sstevel@tonic-gate ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv); 34910Sstevel@tonic-gate 34920Sstevel@tonic-gate mutex_exit(&anoninfo_lock); 34930Sstevel@tonic-gate 34940Sstevel@tonic-gate return (0); 34950Sstevel@tonic-gate } 34960Sstevel@tonic-gate 34970Sstevel@tonic-gate /* 3498*13035SOndrej.Kubecka@Sun.COM * 'unlocked' reserved mem swap so when it is unreserved it 3499*13035SOndrej.Kubecka@Sun.COM * can be moved back phys (disk) swap 35000Sstevel@tonic-gate */ 35010Sstevel@tonic-gate void 35020Sstevel@tonic-gate anon_swap_restore(pgcnt_t npages) 35030Sstevel@tonic-gate { 35040Sstevel@tonic-gate mutex_enter(&anoninfo_lock); 35050Sstevel@tonic-gate 35060Sstevel@tonic-gate ASSERT(k_anoninfo.ani_locked_swap <= k_anoninfo.ani_mem_resv); 35070Sstevel@tonic-gate 35080Sstevel@tonic-gate ASSERT(k_anoninfo.ani_locked_swap >= npages); 35090Sstevel@tonic-gate k_anoninfo.ani_locked_swap -= npages; 35100Sstevel@tonic-gate 35110Sstevel@tonic-gate ASSERT(k_anoninfo.ani_locked_swap <= k_anoninfo.ani_mem_resv); 35120Sstevel@tonic-gate 35130Sstevel@tonic-gate mutex_exit(&anoninfo_lock); 35140Sstevel@tonic-gate } 35150Sstevel@tonic-gate 35160Sstevel@tonic-gate /* 35170Sstevel@tonic-gate * Return the pointer from the list for a 35180Sstevel@tonic-gate * specified anon index. 35190Sstevel@tonic-gate */ 35200Sstevel@tonic-gate ulong_t * 35210Sstevel@tonic-gate anon_get_slot(struct anon_hdr *ahp, ulong_t an_idx) 35220Sstevel@tonic-gate { 35230Sstevel@tonic-gate struct anon **app; 35240Sstevel@tonic-gate void **ppp; 35250Sstevel@tonic-gate 35260Sstevel@tonic-gate ASSERT(an_idx < ahp->size); 35270Sstevel@tonic-gate 35280Sstevel@tonic-gate /* 35290Sstevel@tonic-gate * Single level case. 35300Sstevel@tonic-gate */ 35310Sstevel@tonic-gate if ((ahp->size <= ANON_CHUNK_SIZE) || (ahp->flags & ANON_ALLOC_FORCE)) { 35320Sstevel@tonic-gate return ((ulong_t *)&ahp->array_chunk[an_idx]); 35330Sstevel@tonic-gate } else { 35340Sstevel@tonic-gate 35350Sstevel@tonic-gate /* 35360Sstevel@tonic-gate * 2 level case. 35370Sstevel@tonic-gate */ 35380Sstevel@tonic-gate ppp = &ahp->array_chunk[an_idx >> ANON_CHUNK_SHIFT]; 35390Sstevel@tonic-gate if (*ppp == NULL) { 35400Sstevel@tonic-gate mutex_enter(&ahp->serial_lock); 35410Sstevel@tonic-gate ppp = &ahp->array_chunk[an_idx >> ANON_CHUNK_SHIFT]; 35420Sstevel@tonic-gate if (*ppp == NULL) 35430Sstevel@tonic-gate *ppp = kmem_zalloc(PAGESIZE, KM_SLEEP); 35440Sstevel@tonic-gate mutex_exit(&ahp->serial_lock); 35450Sstevel@tonic-gate } 35460Sstevel@tonic-gate app = *ppp; 35470Sstevel@tonic-gate return ((ulong_t *)&app[an_idx & ANON_CHUNK_OFF]); 35480Sstevel@tonic-gate } 35490Sstevel@tonic-gate } 35500Sstevel@tonic-gate 35510Sstevel@tonic-gate void 35520Sstevel@tonic-gate anon_array_enter(struct anon_map *amp, ulong_t an_idx, anon_sync_obj_t *sobj) 35530Sstevel@tonic-gate { 35540Sstevel@tonic-gate ulong_t *ap_slot; 35550Sstevel@tonic-gate kmutex_t *mtx; 35560Sstevel@tonic-gate kcondvar_t *cv; 35570Sstevel@tonic-gate int hash; 35580Sstevel@tonic-gate 35590Sstevel@tonic-gate /* 35600Sstevel@tonic-gate * Use szc to determine anon slot(s) to appear atomic. 35610Sstevel@tonic-gate * If szc = 0, then lock the anon slot and mark it busy. 35620Sstevel@tonic-gate * If szc > 0, then lock the range of slots by getting the 35630Sstevel@tonic-gate * anon_array_lock for the first anon slot, and mark only the 35640Sstevel@tonic-gate * first anon slot busy to represent whole range being busy. 35650Sstevel@tonic-gate */ 35660Sstevel@tonic-gate 35670Sstevel@tonic-gate ASSERT(RW_READ_HELD(&->a_rwlock)); 35680Sstevel@tonic-gate an_idx = P2ALIGN(an_idx, page_get_pagecnt(amp->a_szc)); 35690Sstevel@tonic-gate hash = ANON_ARRAY_HASH(amp, an_idx); 35700Sstevel@tonic-gate sobj->sync_mutex = mtx = &anon_array_lock[hash].pad_mutex; 35710Sstevel@tonic-gate sobj->sync_cv = cv = &anon_array_cv[hash]; 35720Sstevel@tonic-gate mutex_enter(mtx); 35730Sstevel@tonic-gate ap_slot = anon_get_slot(amp->ahp, an_idx); 35740Sstevel@tonic-gate while (ANON_ISBUSY(ap_slot)) 35750Sstevel@tonic-gate cv_wait(cv, mtx); 35760Sstevel@tonic-gate ANON_SETBUSY(ap_slot); 35770Sstevel@tonic-gate sobj->sync_data = ap_slot; 35780Sstevel@tonic-gate mutex_exit(mtx); 35790Sstevel@tonic-gate } 35800Sstevel@tonic-gate 3581888Scwb int 3582888Scwb anon_array_try_enter(struct anon_map *amp, ulong_t an_idx, 3583888Scwb anon_sync_obj_t *sobj) 3584888Scwb { 3585888Scwb ulong_t *ap_slot; 3586888Scwb kmutex_t *mtx; 3587888Scwb int hash; 3588888Scwb 3589888Scwb /* 3590888Scwb * Try to lock a range of anon slots. 3591888Scwb * Use szc to determine anon slot(s) to appear atomic. 3592888Scwb * If szc = 0, then lock the anon slot and mark it busy. 3593888Scwb * If szc > 0, then lock the range of slots by getting the 3594888Scwb * anon_array_lock for the first anon slot, and mark only the 3595888Scwb * first anon slot busy to represent whole range being busy. 3596888Scwb * Fail if the mutex or the anon_array are busy. 3597888Scwb */ 3598888Scwb 3599888Scwb ASSERT(RW_READ_HELD(&->a_rwlock)); 3600888Scwb an_idx = P2ALIGN(an_idx, page_get_pagecnt(amp->a_szc)); 3601888Scwb hash = ANON_ARRAY_HASH(amp, an_idx); 3602888Scwb sobj->sync_mutex = mtx = &anon_array_lock[hash].pad_mutex; 3603925Scwb sobj->sync_cv = &anon_array_cv[hash]; 3604888Scwb if (!mutex_tryenter(mtx)) { 3605888Scwb return (EWOULDBLOCK); 3606888Scwb } 3607888Scwb ap_slot = anon_get_slot(amp->ahp, an_idx); 3608888Scwb if (ANON_ISBUSY(ap_slot)) { 3609888Scwb mutex_exit(mtx); 3610888Scwb return (EWOULDBLOCK); 3611888Scwb } 3612888Scwb ANON_SETBUSY(ap_slot); 3613888Scwb sobj->sync_data = ap_slot; 3614888Scwb mutex_exit(mtx); 3615888Scwb return (0); 3616888Scwb } 3617888Scwb 36180Sstevel@tonic-gate void 36190Sstevel@tonic-gate anon_array_exit(anon_sync_obj_t *sobj) 36200Sstevel@tonic-gate { 36210Sstevel@tonic-gate mutex_enter(sobj->sync_mutex); 36220Sstevel@tonic-gate ASSERT(ANON_ISBUSY(sobj->sync_data)); 36230Sstevel@tonic-gate ANON_CLRBUSY(sobj->sync_data); 36240Sstevel@tonic-gate if (CV_HAS_WAITERS(sobj->sync_cv)) 36250Sstevel@tonic-gate cv_broadcast(sobj->sync_cv); 36260Sstevel@tonic-gate mutex_exit(sobj->sync_mutex); 36270Sstevel@tonic-gate } 3628