1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate */ 22*0Sstevel@tonic-gate /* 23*0Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*0Sstevel@tonic-gate * Use is subject to license terms. 25*0Sstevel@tonic-gate */ 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*0Sstevel@tonic-gate 29*0Sstevel@tonic-gate #include <sys/types.h> 30*0Sstevel@tonic-gate #include <sys/cmn_err.h> 31*0Sstevel@tonic-gate #include <sys/vmem.h> 32*0Sstevel@tonic-gate #include <sys/kmem.h> 33*0Sstevel@tonic-gate #include <sys/systm.h> 34*0Sstevel@tonic-gate #include <sys/machsystm.h> /* for page_freelist_coalesce() */ 35*0Sstevel@tonic-gate #include <sys/errno.h> 36*0Sstevel@tonic-gate #include <sys/memnode.h> 37*0Sstevel@tonic-gate #include <sys/memlist.h> 38*0Sstevel@tonic-gate #include <sys/memlist_impl.h> 39*0Sstevel@tonic-gate #include <sys/tuneable.h> 40*0Sstevel@tonic-gate #include <sys/proc.h> 41*0Sstevel@tonic-gate #include <sys/disp.h> 42*0Sstevel@tonic-gate #include <sys/debug.h> 43*0Sstevel@tonic-gate #include <sys/vm.h> 44*0Sstevel@tonic-gate #include <sys/callb.h> 45*0Sstevel@tonic-gate #include <sys/memlist_plat.h> /* for installed_top_size() */ 46*0Sstevel@tonic-gate #include <sys/condvar_impl.h> /* for CV_HAS_WAITERS() */ 47*0Sstevel@tonic-gate #include <sys/dumphdr.h> /* for dump_resize() */ 48*0Sstevel@tonic-gate #include <sys/atomic.h> /* for use in stats collection */ 49*0Sstevel@tonic-gate #include <sys/rwlock.h> 50*0Sstevel@tonic-gate #include <sys/cpuvar.h> 51*0Sstevel@tonic-gate #include <vm/seg_kmem.h> 52*0Sstevel@tonic-gate #include <vm/seg_kpm.h> 53*0Sstevel@tonic-gate #include <vm/page.h> 54*0Sstevel@tonic-gate #define SUNDDI_IMPL /* so sunddi.h will not redefine splx() et al */ 55*0Sstevel@tonic-gate #include <sys/sunddi.h> 56*0Sstevel@tonic-gate #include <sys/mem_config.h> 57*0Sstevel@tonic-gate #include <sys/mem_cage.h> 58*0Sstevel@tonic-gate #include <sys/lgrp.h> 59*0Sstevel@tonic-gate #include <sys/ddi.h> 60*0Sstevel@tonic-gate #include <sys/modctl.h> 61*0Sstevel@tonic-gate 62*0Sstevel@tonic-gate extern void memlist_read_lock(void); 63*0Sstevel@tonic-gate extern void memlist_read_unlock(void); 64*0Sstevel@tonic-gate extern void memlist_write_lock(void); 65*0Sstevel@tonic-gate extern void memlist_write_unlock(void); 66*0Sstevel@tonic-gate 67*0Sstevel@tonic-gate extern struct memlist *phys_avail; 68*0Sstevel@tonic-gate 69*0Sstevel@tonic-gate extern void mem_node_add(pfn_t, pfn_t); 70*0Sstevel@tonic-gate extern void mem_node_del(pfn_t, pfn_t); 71*0Sstevel@tonic-gate 72*0Sstevel@tonic-gate extern uint_t page_ctrs_adjust(int); 73*0Sstevel@tonic-gate static void kphysm_setup_post_add(pgcnt_t); 74*0Sstevel@tonic-gate static int kphysm_setup_pre_del(pgcnt_t); 75*0Sstevel@tonic-gate static void kphysm_setup_post_del(pgcnt_t, int); 76*0Sstevel@tonic-gate 77*0Sstevel@tonic-gate static int kphysm_split_memseg(pfn_t base, pgcnt_t npgs); 78*0Sstevel@tonic-gate 79*0Sstevel@tonic-gate static int delspan_reserve(pfn_t, pgcnt_t); 80*0Sstevel@tonic-gate static void delspan_unreserve(pfn_t, pgcnt_t); 81*0Sstevel@tonic-gate 82*0Sstevel@tonic-gate static kmutex_t memseg_lists_lock; 83*0Sstevel@tonic-gate static struct memseg *memseg_va_avail; 84*0Sstevel@tonic-gate static struct memseg *memseg_delete_junk; 85*0Sstevel@tonic-gate static struct memseg *memseg_edit_junk; 86*0Sstevel@tonic-gate void memseg_remap_init(void); 87*0Sstevel@tonic-gate static void memseg_remap_to_dummy(caddr_t, pgcnt_t); 88*0Sstevel@tonic-gate static void kphysm_addmem_error_undospan(pfn_t, pgcnt_t); 89*0Sstevel@tonic-gate static struct memseg *memseg_reuse(pgcnt_t); 90*0Sstevel@tonic-gate 91*0Sstevel@tonic-gate static struct kmem_cache *memseg_cache; 92*0Sstevel@tonic-gate 93*0Sstevel@tonic-gate /* 94*0Sstevel@tonic-gate * Add a chunk of memory to the system. page_t's for this memory 95*0Sstevel@tonic-gate * are allocated in the first few pages of the chunk. 96*0Sstevel@tonic-gate * base: starting PAGESIZE page of new memory. 97*0Sstevel@tonic-gate * npgs: length in PAGESIZE pages. 98*0Sstevel@tonic-gate * 99*0Sstevel@tonic-gate * Adding mem this way doesn't increase the size of the hash tables; 100*0Sstevel@tonic-gate * growing them would be too hard. This should be OK, but adding memory 101*0Sstevel@tonic-gate * dynamically most likely means more hash misses, since the tables will 102*0Sstevel@tonic-gate * be smaller than they otherwise would be. 103*0Sstevel@tonic-gate */ 104*0Sstevel@tonic-gate int 105*0Sstevel@tonic-gate kphysm_add_memory_dynamic(pfn_t base, pgcnt_t npgs) 106*0Sstevel@tonic-gate { 107*0Sstevel@tonic-gate page_t *pp; 108*0Sstevel@tonic-gate page_t *opp, *oepp; 109*0Sstevel@tonic-gate struct memseg *seg; 110*0Sstevel@tonic-gate uint64_t avmem; 111*0Sstevel@tonic-gate pfn_t pfn; 112*0Sstevel@tonic-gate pfn_t pt_base = base; 113*0Sstevel@tonic-gate pgcnt_t tpgs = npgs; 114*0Sstevel@tonic-gate pgcnt_t metapgs; 115*0Sstevel@tonic-gate int exhausted; 116*0Sstevel@tonic-gate pfn_t pnum; 117*0Sstevel@tonic-gate int mnode; 118*0Sstevel@tonic-gate caddr_t vaddr; 119*0Sstevel@tonic-gate int reuse; 120*0Sstevel@tonic-gate int mlret; 121*0Sstevel@tonic-gate void *mapva; 122*0Sstevel@tonic-gate pgcnt_t nkpmpgs = 0; 123*0Sstevel@tonic-gate offset_t kpm_pages_off; 124*0Sstevel@tonic-gate 125*0Sstevel@tonic-gate cmn_err(CE_CONT, 126*0Sstevel@tonic-gate "?kphysm_add_memory_dynamic: adding %ldK at 0x%" PRIx64 "\n", 127*0Sstevel@tonic-gate npgs << (PAGESHIFT - 10), (uint64_t)base << PAGESHIFT); 128*0Sstevel@tonic-gate 129*0Sstevel@tonic-gate /* 130*0Sstevel@tonic-gate * Add this span in the delete list to prevent interactions. 131*0Sstevel@tonic-gate */ 132*0Sstevel@tonic-gate if (!delspan_reserve(base, npgs)) { 133*0Sstevel@tonic-gate return (KPHYSM_ESPAN); 134*0Sstevel@tonic-gate } 135*0Sstevel@tonic-gate /* 136*0Sstevel@tonic-gate * Check to see if any of the memory span has been added 137*0Sstevel@tonic-gate * by trying an add to the installed memory list. This 138*0Sstevel@tonic-gate * forms the interlocking process for add. 139*0Sstevel@tonic-gate */ 140*0Sstevel@tonic-gate 141*0Sstevel@tonic-gate memlist_write_lock(); 142*0Sstevel@tonic-gate 143*0Sstevel@tonic-gate mlret = memlist_add_span((uint64_t)(pt_base) << PAGESHIFT, 144*0Sstevel@tonic-gate (uint64_t)(tpgs) << PAGESHIFT, &phys_install); 145*0Sstevel@tonic-gate 146*0Sstevel@tonic-gate if (mlret == MEML_SPANOP_OK) 147*0Sstevel@tonic-gate installed_top_size(phys_install, &physmax, &physinstalled); 148*0Sstevel@tonic-gate 149*0Sstevel@tonic-gate memlist_write_unlock(); 150*0Sstevel@tonic-gate 151*0Sstevel@tonic-gate if (mlret != MEML_SPANOP_OK) { 152*0Sstevel@tonic-gate if (mlret == MEML_SPANOP_EALLOC) { 153*0Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs); 154*0Sstevel@tonic-gate return (KPHYSM_ERESOURCE); 155*0Sstevel@tonic-gate } else 156*0Sstevel@tonic-gate if (mlret == MEML_SPANOP_ESPAN) { 157*0Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs); 158*0Sstevel@tonic-gate return (KPHYSM_ESPAN); 159*0Sstevel@tonic-gate } else { 160*0Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs); 161*0Sstevel@tonic-gate return (KPHYSM_ERESOURCE); 162*0Sstevel@tonic-gate } 163*0Sstevel@tonic-gate } 164*0Sstevel@tonic-gate 165*0Sstevel@tonic-gate /* 166*0Sstevel@tonic-gate * We store the page_t's for this new memory in the first 167*0Sstevel@tonic-gate * few pages of the chunk. Here, we go and get'em ... 168*0Sstevel@tonic-gate */ 169*0Sstevel@tonic-gate 170*0Sstevel@tonic-gate /* 171*0Sstevel@tonic-gate * The expression after the '-' gives the number of pages 172*0Sstevel@tonic-gate * that will fit in the new memory based on a requirement 173*0Sstevel@tonic-gate * of (PAGESIZE + sizeof (page_t)) bytes per page. 174*0Sstevel@tonic-gate */ 175*0Sstevel@tonic-gate metapgs = npgs - (((uint64_t)(npgs) << PAGESHIFT) / 176*0Sstevel@tonic-gate (PAGESIZE + sizeof (page_t))); 177*0Sstevel@tonic-gate 178*0Sstevel@tonic-gate npgs -= metapgs; 179*0Sstevel@tonic-gate base += metapgs; 180*0Sstevel@tonic-gate 181*0Sstevel@tonic-gate ASSERT(btopr(npgs * sizeof (page_t)) <= metapgs); 182*0Sstevel@tonic-gate 183*0Sstevel@tonic-gate exhausted = (metapgs == 0 || npgs == 0); 184*0Sstevel@tonic-gate 185*0Sstevel@tonic-gate if (kpm_enable && !exhausted) { 186*0Sstevel@tonic-gate pgcnt_t start, end, nkpmpgs_prelim; 187*0Sstevel@tonic-gate size_t ptsz; 188*0Sstevel@tonic-gate 189*0Sstevel@tonic-gate /* 190*0Sstevel@tonic-gate * A viable kpm large page mapping must not overlap two 191*0Sstevel@tonic-gate * dynamic memsegs. Therefore the total size is checked 192*0Sstevel@tonic-gate * to be at least kpm_pgsz and also whether start and end 193*0Sstevel@tonic-gate * points are at least kpm_pgsz aligned. 194*0Sstevel@tonic-gate */ 195*0Sstevel@tonic-gate if (ptokpmp(tpgs) < 1 || pmodkpmp(pt_base) || 196*0Sstevel@tonic-gate pmodkpmp(base + npgs)) { 197*0Sstevel@tonic-gate 198*0Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs); 199*0Sstevel@tonic-gate 200*0Sstevel@tonic-gate /* 201*0Sstevel@tonic-gate * There is no specific error code for violating 202*0Sstevel@tonic-gate * kpm granularity constraints. 203*0Sstevel@tonic-gate */ 204*0Sstevel@tonic-gate return (KPHYSM_ENOTVIABLE); 205*0Sstevel@tonic-gate } 206*0Sstevel@tonic-gate 207*0Sstevel@tonic-gate start = kpmptop(ptokpmp(base)); 208*0Sstevel@tonic-gate end = kpmptop(ptokpmp(base + npgs)); 209*0Sstevel@tonic-gate nkpmpgs_prelim = ptokpmp(end - start); 210*0Sstevel@tonic-gate ptsz = npgs * sizeof (page_t); 211*0Sstevel@tonic-gate metapgs = btopr(ptsz + nkpmpgs_prelim * KPMPAGE_T_SZ); 212*0Sstevel@tonic-gate exhausted = (tpgs <= metapgs); 213*0Sstevel@tonic-gate if (!exhausted) { 214*0Sstevel@tonic-gate npgs = tpgs - metapgs; 215*0Sstevel@tonic-gate base = pt_base + metapgs; 216*0Sstevel@tonic-gate 217*0Sstevel@tonic-gate /* final nkpmpgs */ 218*0Sstevel@tonic-gate start = kpmptop(ptokpmp(base)); 219*0Sstevel@tonic-gate nkpmpgs = ptokpmp(end - start); 220*0Sstevel@tonic-gate kpm_pages_off = ptsz + 221*0Sstevel@tonic-gate (nkpmpgs_prelim - nkpmpgs) * KPMPAGE_T_SZ; 222*0Sstevel@tonic-gate } 223*0Sstevel@tonic-gate } 224*0Sstevel@tonic-gate 225*0Sstevel@tonic-gate /* 226*0Sstevel@tonic-gate * Is memory area supplied too small? 227*0Sstevel@tonic-gate */ 228*0Sstevel@tonic-gate if (exhausted) { 229*0Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs); 230*0Sstevel@tonic-gate 231*0Sstevel@tonic-gate /* 232*0Sstevel@tonic-gate * There is no specific error code for 'too small'. 233*0Sstevel@tonic-gate */ 234*0Sstevel@tonic-gate return (KPHYSM_ERESOURCE); 235*0Sstevel@tonic-gate } 236*0Sstevel@tonic-gate 237*0Sstevel@tonic-gate /* 238*0Sstevel@tonic-gate * We may re-use a previously allocated VA space for the page_ts 239*0Sstevel@tonic-gate * eventually, but we need to initialize and lock the pages first. 240*0Sstevel@tonic-gate */ 241*0Sstevel@tonic-gate 242*0Sstevel@tonic-gate /* 243*0Sstevel@tonic-gate * Get an address in the kernel address map, map 244*0Sstevel@tonic-gate * the page_t pages and see if we can touch them. 245*0Sstevel@tonic-gate */ 246*0Sstevel@tonic-gate 247*0Sstevel@tonic-gate mapva = vmem_alloc(heap_arena, ptob(metapgs), VM_NOSLEEP); 248*0Sstevel@tonic-gate if (mapva == NULL) { 249*0Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_add_memory_dynamic:" 250*0Sstevel@tonic-gate " Can't allocate VA for page_ts"); 251*0Sstevel@tonic-gate 252*0Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs); 253*0Sstevel@tonic-gate 254*0Sstevel@tonic-gate return (KPHYSM_ERESOURCE); 255*0Sstevel@tonic-gate } 256*0Sstevel@tonic-gate pp = mapva; 257*0Sstevel@tonic-gate 258*0Sstevel@tonic-gate if (physmax < (pt_base + tpgs)) 259*0Sstevel@tonic-gate physmax = (pt_base + tpgs); 260*0Sstevel@tonic-gate 261*0Sstevel@tonic-gate /* 262*0Sstevel@tonic-gate * In the remapping code we map one page at a time so we must do 263*0Sstevel@tonic-gate * the same here to match mapping sizes. 264*0Sstevel@tonic-gate */ 265*0Sstevel@tonic-gate pfn = pt_base; 266*0Sstevel@tonic-gate vaddr = (caddr_t)pp; 267*0Sstevel@tonic-gate for (pnum = 0; pnum < metapgs; pnum++) { 268*0Sstevel@tonic-gate hat_devload(kas.a_hat, vaddr, ptob(1), pfn, 269*0Sstevel@tonic-gate PROT_READ | PROT_WRITE, 270*0Sstevel@tonic-gate HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST); 271*0Sstevel@tonic-gate pfn++; 272*0Sstevel@tonic-gate vaddr += ptob(1); 273*0Sstevel@tonic-gate } 274*0Sstevel@tonic-gate 275*0Sstevel@tonic-gate if (ddi_peek32((dev_info_t *)NULL, 276*0Sstevel@tonic-gate (int32_t *)pp, (int32_t *)0) == DDI_FAILURE) { 277*0Sstevel@tonic-gate 278*0Sstevel@tonic-gate cmn_err(CE_PANIC, "kphysm_add_memory_dynamic:" 279*0Sstevel@tonic-gate " Can't access pp array at 0x%p [phys 0x%lx]", 280*0Sstevel@tonic-gate (void *)pp, pt_base); 281*0Sstevel@tonic-gate 282*0Sstevel@tonic-gate hat_unload(kas.a_hat, (caddr_t)pp, ptob(metapgs), 283*0Sstevel@tonic-gate HAT_UNLOAD_UNMAP|HAT_UNLOAD_UNLOCK); 284*0Sstevel@tonic-gate 285*0Sstevel@tonic-gate vmem_free(heap_arena, mapva, ptob(metapgs)); 286*0Sstevel@tonic-gate 287*0Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs); 288*0Sstevel@tonic-gate 289*0Sstevel@tonic-gate return (KPHYSM_EFAULT); 290*0Sstevel@tonic-gate } 291*0Sstevel@tonic-gate 292*0Sstevel@tonic-gate /* 293*0Sstevel@tonic-gate * Add this memory slice to its memory node translation. 294*0Sstevel@tonic-gate * 295*0Sstevel@tonic-gate * Note that right now, each node may have only one slice; 296*0Sstevel@tonic-gate * this may change with COD or in larger SSM systems with 297*0Sstevel@tonic-gate * nested latency groups, so we must not assume that the 298*0Sstevel@tonic-gate * node does not yet exist. 299*0Sstevel@tonic-gate */ 300*0Sstevel@tonic-gate pnum = base + npgs - 1; 301*0Sstevel@tonic-gate mem_node_add_slice(base, pnum); 302*0Sstevel@tonic-gate 303*0Sstevel@tonic-gate /* 304*0Sstevel@tonic-gate * Allocate or resize page counters as necessary to accomodate 305*0Sstevel@tonic-gate * the increase in memory pages. 306*0Sstevel@tonic-gate */ 307*0Sstevel@tonic-gate mnode = PFN_2_MEM_NODE(pnum); 308*0Sstevel@tonic-gate if (page_ctrs_adjust(mnode) != 0) { 309*0Sstevel@tonic-gate 310*0Sstevel@tonic-gate mem_node_pre_del_slice(base, pnum); 311*0Sstevel@tonic-gate mem_node_post_del_slice(base, pnum, 0); 312*0Sstevel@tonic-gate 313*0Sstevel@tonic-gate hat_unload(kas.a_hat, (caddr_t)pp, ptob(metapgs), 314*0Sstevel@tonic-gate HAT_UNLOAD_UNMAP|HAT_UNLOAD_UNLOCK); 315*0Sstevel@tonic-gate 316*0Sstevel@tonic-gate vmem_free(heap_arena, mapva, ptob(metapgs)); 317*0Sstevel@tonic-gate 318*0Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs); 319*0Sstevel@tonic-gate 320*0Sstevel@tonic-gate return (KPHYSM_ERESOURCE); 321*0Sstevel@tonic-gate } 322*0Sstevel@tonic-gate 323*0Sstevel@tonic-gate /* 324*0Sstevel@tonic-gate * Update the phys_avail memory list. 325*0Sstevel@tonic-gate * The phys_install list was done at the start. 326*0Sstevel@tonic-gate */ 327*0Sstevel@tonic-gate 328*0Sstevel@tonic-gate memlist_write_lock(); 329*0Sstevel@tonic-gate 330*0Sstevel@tonic-gate mlret = memlist_add_span((uint64_t)(base) << PAGESHIFT, 331*0Sstevel@tonic-gate (uint64_t)(npgs) << PAGESHIFT, &phys_avail); 332*0Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK); 333*0Sstevel@tonic-gate 334*0Sstevel@tonic-gate memlist_write_unlock(); 335*0Sstevel@tonic-gate 336*0Sstevel@tonic-gate /* See if we can find a memseg to re-use. */ 337*0Sstevel@tonic-gate seg = memseg_reuse(metapgs); 338*0Sstevel@tonic-gate 339*0Sstevel@tonic-gate reuse = (seg != NULL); 340*0Sstevel@tonic-gate 341*0Sstevel@tonic-gate /* 342*0Sstevel@tonic-gate * Initialize the memseg structure representing this memory 343*0Sstevel@tonic-gate * and add it to the existing list of memsegs. Do some basic 344*0Sstevel@tonic-gate * initialization and add the memory to the system. 345*0Sstevel@tonic-gate * In order to prevent lock deadlocks, the add_physmem() 346*0Sstevel@tonic-gate * code is repeated here, but split into several stages. 347*0Sstevel@tonic-gate */ 348*0Sstevel@tonic-gate if (seg == NULL) { 349*0Sstevel@tonic-gate seg = kmem_cache_alloc(memseg_cache, KM_SLEEP); 350*0Sstevel@tonic-gate bzero(seg, sizeof (struct memseg)); 351*0Sstevel@tonic-gate seg->msegflags = MEMSEG_DYNAMIC; 352*0Sstevel@tonic-gate seg->pages = pp; 353*0Sstevel@tonic-gate } else { 354*0Sstevel@tonic-gate /*EMPTY*/ 355*0Sstevel@tonic-gate ASSERT(seg->msegflags & MEMSEG_DYNAMIC); 356*0Sstevel@tonic-gate } 357*0Sstevel@tonic-gate 358*0Sstevel@tonic-gate seg->epages = seg->pages + npgs; 359*0Sstevel@tonic-gate seg->pages_base = base; 360*0Sstevel@tonic-gate seg->pages_end = base + npgs; 361*0Sstevel@tonic-gate 362*0Sstevel@tonic-gate /* 363*0Sstevel@tonic-gate * Initialize metadata. The page_ts are set to locked state 364*0Sstevel@tonic-gate * ready to be freed. 365*0Sstevel@tonic-gate */ 366*0Sstevel@tonic-gate bzero((caddr_t)pp, ptob(metapgs)); 367*0Sstevel@tonic-gate 368*0Sstevel@tonic-gate pfn = seg->pages_base; 369*0Sstevel@tonic-gate /* Save the original pp base in case we reuse a memseg. */ 370*0Sstevel@tonic-gate opp = pp; 371*0Sstevel@tonic-gate oepp = opp + npgs; 372*0Sstevel@tonic-gate for (pp = opp; pp < oepp; pp++) { 373*0Sstevel@tonic-gate pp->p_pagenum = pfn; 374*0Sstevel@tonic-gate pfn++; 375*0Sstevel@tonic-gate page_iolock_init(pp); 376*0Sstevel@tonic-gate while (!page_lock(pp, SE_EXCL, (kmutex_t *)NULL, P_RECLAIM)) 377*0Sstevel@tonic-gate continue; 378*0Sstevel@tonic-gate pp->p_offset = (u_offset_t)-1; 379*0Sstevel@tonic-gate } 380*0Sstevel@tonic-gate 381*0Sstevel@tonic-gate if (reuse) { 382*0Sstevel@tonic-gate /* Remap our page_ts to the re-used memseg VA space. */ 383*0Sstevel@tonic-gate pfn = pt_base; 384*0Sstevel@tonic-gate vaddr = (caddr_t)seg->pages; 385*0Sstevel@tonic-gate for (pnum = 0; pnum < metapgs; pnum++) { 386*0Sstevel@tonic-gate hat_devload(kas.a_hat, vaddr, ptob(1), pfn, 387*0Sstevel@tonic-gate PROT_READ | PROT_WRITE, 388*0Sstevel@tonic-gate HAT_LOAD_REMAP | HAT_LOAD | HAT_LOAD_NOCONSIST); 389*0Sstevel@tonic-gate pfn++; 390*0Sstevel@tonic-gate vaddr += ptob(1); 391*0Sstevel@tonic-gate } 392*0Sstevel@tonic-gate 393*0Sstevel@tonic-gate hat_unload(kas.a_hat, (caddr_t)opp, ptob(metapgs), 394*0Sstevel@tonic-gate HAT_UNLOAD_UNMAP|HAT_UNLOAD_UNLOCK); 395*0Sstevel@tonic-gate 396*0Sstevel@tonic-gate vmem_free(heap_arena, mapva, ptob(metapgs)); 397*0Sstevel@tonic-gate } 398*0Sstevel@tonic-gate 399*0Sstevel@tonic-gate hat_kpm_addmem_mseg_update(seg, nkpmpgs, kpm_pages_off); 400*0Sstevel@tonic-gate 401*0Sstevel@tonic-gate memsegs_lock(1); 402*0Sstevel@tonic-gate 403*0Sstevel@tonic-gate /* 404*0Sstevel@tonic-gate * The new memseg is inserted at the beginning of the list. 405*0Sstevel@tonic-gate * Not only does this save searching for the tail, but in the 406*0Sstevel@tonic-gate * case of a re-used memseg, it solves the problem of what 407*0Sstevel@tonic-gate * happens of some process has still got a pointer to the 408*0Sstevel@tonic-gate * memseg and follows the next pointer to continue traversing 409*0Sstevel@tonic-gate * the memsegs list. 410*0Sstevel@tonic-gate */ 411*0Sstevel@tonic-gate 412*0Sstevel@tonic-gate hat_kpm_addmem_mseg_insert(seg); 413*0Sstevel@tonic-gate 414*0Sstevel@tonic-gate seg->next = memsegs; 415*0Sstevel@tonic-gate membar_producer(); 416*0Sstevel@tonic-gate 417*0Sstevel@tonic-gate hat_kpm_addmem_memsegs_update(seg); 418*0Sstevel@tonic-gate 419*0Sstevel@tonic-gate memsegs = seg; 420*0Sstevel@tonic-gate 421*0Sstevel@tonic-gate build_pfn_hash(); 422*0Sstevel@tonic-gate 423*0Sstevel@tonic-gate total_pages += npgs; 424*0Sstevel@tonic-gate 425*0Sstevel@tonic-gate /* 426*0Sstevel@tonic-gate * Recalculate the paging parameters now total_pages has changed. 427*0Sstevel@tonic-gate * This will also cause the clock hands to be reset before next use. 428*0Sstevel@tonic-gate */ 429*0Sstevel@tonic-gate setupclock(1); 430*0Sstevel@tonic-gate 431*0Sstevel@tonic-gate memsegs_unlock(1); 432*0Sstevel@tonic-gate 433*0Sstevel@tonic-gate /* 434*0Sstevel@tonic-gate * Free the pages outside the lock to avoid locking loops. 435*0Sstevel@tonic-gate */ 436*0Sstevel@tonic-gate for (pp = seg->pages; pp < seg->epages; pp++) { 437*0Sstevel@tonic-gate page_free(pp, 1); 438*0Sstevel@tonic-gate } 439*0Sstevel@tonic-gate 440*0Sstevel@tonic-gate /* 441*0Sstevel@tonic-gate * Now that we've updated the appropriate memory lists we 442*0Sstevel@tonic-gate * need to reset a number of globals, since we've increased memory. 443*0Sstevel@tonic-gate * Several have already been updated for us as noted above. The 444*0Sstevel@tonic-gate * globals we're interested in at this point are: 445*0Sstevel@tonic-gate * physmax - highest page frame number. 446*0Sstevel@tonic-gate * physinstalled - number of pages currently installed (done earlier) 447*0Sstevel@tonic-gate * maxmem - max free pages in the system 448*0Sstevel@tonic-gate * physmem - physical memory pages available 449*0Sstevel@tonic-gate * availrmem - real memory available 450*0Sstevel@tonic-gate */ 451*0Sstevel@tonic-gate 452*0Sstevel@tonic-gate mutex_enter(&freemem_lock); 453*0Sstevel@tonic-gate maxmem += npgs; 454*0Sstevel@tonic-gate physmem += npgs; 455*0Sstevel@tonic-gate availrmem += npgs; 456*0Sstevel@tonic-gate availrmem_initial += npgs; 457*0Sstevel@tonic-gate 458*0Sstevel@tonic-gate mutex_exit(&freemem_lock); 459*0Sstevel@tonic-gate 460*0Sstevel@tonic-gate dump_resize(); 461*0Sstevel@tonic-gate 462*0Sstevel@tonic-gate page_freelist_coalesce_all(mnode); 463*0Sstevel@tonic-gate 464*0Sstevel@tonic-gate kphysm_setup_post_add(npgs); 465*0Sstevel@tonic-gate 466*0Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_add_memory_dynamic: mem = %ldK " 467*0Sstevel@tonic-gate "(0x%" PRIx64 ")\n", 468*0Sstevel@tonic-gate physinstalled << (PAGESHIFT - 10), 469*0Sstevel@tonic-gate (uint64_t)physinstalled << PAGESHIFT); 470*0Sstevel@tonic-gate 471*0Sstevel@tonic-gate avmem = (uint64_t)freemem << PAGESHIFT; 472*0Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_add_memory_dynamic: " 473*0Sstevel@tonic-gate "avail mem = %" PRId64 "\n", avmem); 474*0Sstevel@tonic-gate 475*0Sstevel@tonic-gate /* 476*0Sstevel@tonic-gate * Update lgroup generation number on single lgroup systems 477*0Sstevel@tonic-gate */ 478*0Sstevel@tonic-gate if (nlgrps == 1) 479*0Sstevel@tonic-gate lgrp_config(LGRP_CONFIG_GEN_UPDATE, 0, 0); 480*0Sstevel@tonic-gate 481*0Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs); 482*0Sstevel@tonic-gate return (KPHYSM_OK); /* Successfully added system memory */ 483*0Sstevel@tonic-gate 484*0Sstevel@tonic-gate } 485*0Sstevel@tonic-gate 486*0Sstevel@tonic-gate /* 487*0Sstevel@tonic-gate * There are various error conditions in kphysm_add_memory_dynamic() 488*0Sstevel@tonic-gate * which require a rollback of already changed global state. 489*0Sstevel@tonic-gate */ 490*0Sstevel@tonic-gate static void 491*0Sstevel@tonic-gate kphysm_addmem_error_undospan(pfn_t pt_base, pgcnt_t tpgs) 492*0Sstevel@tonic-gate { 493*0Sstevel@tonic-gate int mlret; 494*0Sstevel@tonic-gate 495*0Sstevel@tonic-gate /* Unreserve memory span. */ 496*0Sstevel@tonic-gate memlist_write_lock(); 497*0Sstevel@tonic-gate 498*0Sstevel@tonic-gate mlret = memlist_delete_span( 499*0Sstevel@tonic-gate (uint64_t)(pt_base) << PAGESHIFT, 500*0Sstevel@tonic-gate (uint64_t)(tpgs) << PAGESHIFT, &phys_install); 501*0Sstevel@tonic-gate 502*0Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK); 503*0Sstevel@tonic-gate phys_install_has_changed(); 504*0Sstevel@tonic-gate installed_top_size(phys_install, &physmax, &physinstalled); 505*0Sstevel@tonic-gate 506*0Sstevel@tonic-gate memlist_write_unlock(); 507*0Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs); 508*0Sstevel@tonic-gate } 509*0Sstevel@tonic-gate 510*0Sstevel@tonic-gate /* 511*0Sstevel@tonic-gate * Only return an available memseg of exactly the right size. 512*0Sstevel@tonic-gate * When the meta data area has it's own virtual address space 513*0Sstevel@tonic-gate * we will need to manage this more carefully and do best fit 514*0Sstevel@tonic-gate * allocations, possibly splitting an availble area. 515*0Sstevel@tonic-gate */ 516*0Sstevel@tonic-gate static struct memseg * 517*0Sstevel@tonic-gate memseg_reuse(pgcnt_t metapgs) 518*0Sstevel@tonic-gate { 519*0Sstevel@tonic-gate struct memseg **segpp, *seg; 520*0Sstevel@tonic-gate 521*0Sstevel@tonic-gate mutex_enter(&memseg_lists_lock); 522*0Sstevel@tonic-gate 523*0Sstevel@tonic-gate segpp = &memseg_va_avail; 524*0Sstevel@tonic-gate for (; (seg = *segpp) != NULL; segpp = &seg->lnext) { 525*0Sstevel@tonic-gate caddr_t end; 526*0Sstevel@tonic-gate 527*0Sstevel@tonic-gate if (kpm_enable) 528*0Sstevel@tonic-gate end = hat_kpm_mseg_reuse(seg); 529*0Sstevel@tonic-gate else 530*0Sstevel@tonic-gate end = (caddr_t)seg->epages; 531*0Sstevel@tonic-gate 532*0Sstevel@tonic-gate if (btopr(end - (caddr_t)seg->pages) == metapgs) { 533*0Sstevel@tonic-gate *segpp = seg->lnext; 534*0Sstevel@tonic-gate seg->lnext = NULL; 535*0Sstevel@tonic-gate break; 536*0Sstevel@tonic-gate } 537*0Sstevel@tonic-gate } 538*0Sstevel@tonic-gate mutex_exit(&memseg_lists_lock); 539*0Sstevel@tonic-gate 540*0Sstevel@tonic-gate return (seg); 541*0Sstevel@tonic-gate } 542*0Sstevel@tonic-gate 543*0Sstevel@tonic-gate static uint_t handle_gen; 544*0Sstevel@tonic-gate 545*0Sstevel@tonic-gate struct memdelspan { 546*0Sstevel@tonic-gate struct memdelspan *mds_next; 547*0Sstevel@tonic-gate pfn_t mds_base; 548*0Sstevel@tonic-gate pgcnt_t mds_npgs; 549*0Sstevel@tonic-gate uint_t *mds_bitmap; 550*0Sstevel@tonic-gate uint_t *mds_bitmap_retired; 551*0Sstevel@tonic-gate }; 552*0Sstevel@tonic-gate 553*0Sstevel@tonic-gate #define NBPBMW (sizeof (uint_t) * NBBY) 554*0Sstevel@tonic-gate #define MDS_BITMAPBYTES(MDSP) \ 555*0Sstevel@tonic-gate ((((MDSP)->mds_npgs + NBPBMW - 1) / NBPBMW) * sizeof (uint_t)) 556*0Sstevel@tonic-gate 557*0Sstevel@tonic-gate struct transit_list { 558*0Sstevel@tonic-gate struct transit_list *trl_next; 559*0Sstevel@tonic-gate struct memdelspan *trl_spans; 560*0Sstevel@tonic-gate int trl_collect; 561*0Sstevel@tonic-gate }; 562*0Sstevel@tonic-gate 563*0Sstevel@tonic-gate struct transit_list_head { 564*0Sstevel@tonic-gate kmutex_t trh_lock; 565*0Sstevel@tonic-gate struct transit_list *trh_head; 566*0Sstevel@tonic-gate }; 567*0Sstevel@tonic-gate 568*0Sstevel@tonic-gate static struct transit_list_head transit_list_head; 569*0Sstevel@tonic-gate 570*0Sstevel@tonic-gate struct mem_handle; 571*0Sstevel@tonic-gate static void transit_list_collect(struct mem_handle *, int); 572*0Sstevel@tonic-gate static void transit_list_insert(struct transit_list *); 573*0Sstevel@tonic-gate static void transit_list_remove(struct transit_list *); 574*0Sstevel@tonic-gate 575*0Sstevel@tonic-gate #ifdef DEBUG 576*0Sstevel@tonic-gate #define MEM_DEL_STATS 577*0Sstevel@tonic-gate #endif /* DEBUG */ 578*0Sstevel@tonic-gate 579*0Sstevel@tonic-gate #ifdef MEM_DEL_STATS 580*0Sstevel@tonic-gate static int mem_del_stat_print = 0; 581*0Sstevel@tonic-gate struct mem_del_stat { 582*0Sstevel@tonic-gate uint_t nloop; 583*0Sstevel@tonic-gate uint_t need_free; 584*0Sstevel@tonic-gate uint_t free_loop; 585*0Sstevel@tonic-gate uint_t free_low; 586*0Sstevel@tonic-gate uint_t free_failed; 587*0Sstevel@tonic-gate uint_t ncheck; 588*0Sstevel@tonic-gate uint_t nopaget; 589*0Sstevel@tonic-gate uint_t lockfail; 590*0Sstevel@tonic-gate uint_t nfree; 591*0Sstevel@tonic-gate uint_t nreloc; 592*0Sstevel@tonic-gate uint_t nrelocfail; 593*0Sstevel@tonic-gate uint_t already_done; 594*0Sstevel@tonic-gate uint_t first_notfree; 595*0Sstevel@tonic-gate uint_t npplocked; 596*0Sstevel@tonic-gate uint_t nlockreloc; 597*0Sstevel@tonic-gate uint_t nnorepl; 598*0Sstevel@tonic-gate uint_t nmodreloc; 599*0Sstevel@tonic-gate uint_t ndestroy; 600*0Sstevel@tonic-gate uint_t nputpage; 601*0Sstevel@tonic-gate uint_t nnoreclaim; 602*0Sstevel@tonic-gate uint_t ndelay; 603*0Sstevel@tonic-gate uint_t demotefail; 604*0Sstevel@tonic-gate uint64_t nticks_total; 605*0Sstevel@tonic-gate uint64_t nticks_pgrp; 606*0Sstevel@tonic-gate uint_t retired; 607*0Sstevel@tonic-gate uint_t toxic; 608*0Sstevel@tonic-gate uint_t failing; 609*0Sstevel@tonic-gate uint_t modtoxic; 610*0Sstevel@tonic-gate uint_t npplkdtoxic; 611*0Sstevel@tonic-gate uint_t gptlmodfail; 612*0Sstevel@tonic-gate uint_t gptllckfail; 613*0Sstevel@tonic-gate }; 614*0Sstevel@tonic-gate /* 615*0Sstevel@tonic-gate * The stat values are only incremented in the delete thread 616*0Sstevel@tonic-gate * so no locking or atomic required. 617*0Sstevel@tonic-gate */ 618*0Sstevel@tonic-gate #define MDSTAT_INCR(MHP, FLD) (MHP)->mh_delstat.FLD++ 619*0Sstevel@tonic-gate #define MDSTAT_TOTAL(MHP, ntck) ((MHP)->mh_delstat.nticks_total += (ntck)) 620*0Sstevel@tonic-gate #define MDSTAT_PGRP(MHP, ntck) ((MHP)->mh_delstat.nticks_pgrp += (ntck)) 621*0Sstevel@tonic-gate static void mem_del_stat_print_func(struct mem_handle *); 622*0Sstevel@tonic-gate #define MDSTAT_PRINT(MHP) mem_del_stat_print_func((MHP)) 623*0Sstevel@tonic-gate #else /* MEM_DEL_STATS */ 624*0Sstevel@tonic-gate #define MDSTAT_INCR(MHP, FLD) 625*0Sstevel@tonic-gate #define MDSTAT_TOTAL(MHP, ntck) 626*0Sstevel@tonic-gate #define MDSTAT_PGRP(MHP, ntck) 627*0Sstevel@tonic-gate #define MDSTAT_PRINT(MHP) 628*0Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 629*0Sstevel@tonic-gate 630*0Sstevel@tonic-gate typedef enum mhnd_state {MHND_FREE = 0, MHND_INIT, MHND_STARTING, 631*0Sstevel@tonic-gate MHND_RUNNING, MHND_DONE, MHND_RELEASE} mhnd_state_t; 632*0Sstevel@tonic-gate 633*0Sstevel@tonic-gate /* 634*0Sstevel@tonic-gate * mh_mutex must be taken to examine or change mh_exthandle and mh_state. 635*0Sstevel@tonic-gate * The mutex may not be required for other fields, dependent on mh_state. 636*0Sstevel@tonic-gate */ 637*0Sstevel@tonic-gate struct mem_handle { 638*0Sstevel@tonic-gate kmutex_t mh_mutex; 639*0Sstevel@tonic-gate struct mem_handle *mh_next; 640*0Sstevel@tonic-gate memhandle_t mh_exthandle; 641*0Sstevel@tonic-gate mhnd_state_t mh_state; 642*0Sstevel@tonic-gate struct transit_list mh_transit; 643*0Sstevel@tonic-gate pgcnt_t mh_phys_pages; 644*0Sstevel@tonic-gate pgcnt_t mh_vm_pages; 645*0Sstevel@tonic-gate pgcnt_t mh_hold_todo; 646*0Sstevel@tonic-gate void (*mh_delete_complete)(void *, int error); 647*0Sstevel@tonic-gate void *mh_delete_complete_arg; 648*0Sstevel@tonic-gate volatile uint_t mh_cancel; 649*0Sstevel@tonic-gate volatile uint_t mh_dr_aio_cleanup_cancel; 650*0Sstevel@tonic-gate volatile uint_t mh_aio_cleanup_done; 651*0Sstevel@tonic-gate kcondvar_t mh_cv; 652*0Sstevel@tonic-gate kthread_id_t mh_thread_id; 653*0Sstevel@tonic-gate page_t *mh_deleted; /* link through p_next */ 654*0Sstevel@tonic-gate #ifdef MEM_DEL_STATS 655*0Sstevel@tonic-gate struct mem_del_stat mh_delstat; 656*0Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 657*0Sstevel@tonic-gate }; 658*0Sstevel@tonic-gate 659*0Sstevel@tonic-gate static struct mem_handle *mem_handle_head; 660*0Sstevel@tonic-gate static kmutex_t mem_handle_list_mutex; 661*0Sstevel@tonic-gate 662*0Sstevel@tonic-gate static struct mem_handle * 663*0Sstevel@tonic-gate kphysm_allocate_mem_handle() 664*0Sstevel@tonic-gate { 665*0Sstevel@tonic-gate struct mem_handle *mhp; 666*0Sstevel@tonic-gate 667*0Sstevel@tonic-gate mhp = kmem_zalloc(sizeof (struct mem_handle), KM_SLEEP); 668*0Sstevel@tonic-gate mutex_init(&mhp->mh_mutex, NULL, MUTEX_DEFAULT, NULL); 669*0Sstevel@tonic-gate mutex_enter(&mem_handle_list_mutex); 670*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 671*0Sstevel@tonic-gate /* handle_gen is protected by list mutex. */ 672*0Sstevel@tonic-gate mhp->mh_exthandle = (memhandle_t)(++handle_gen); 673*0Sstevel@tonic-gate mhp->mh_next = mem_handle_head; 674*0Sstevel@tonic-gate mem_handle_head = mhp; 675*0Sstevel@tonic-gate mutex_exit(&mem_handle_list_mutex); 676*0Sstevel@tonic-gate 677*0Sstevel@tonic-gate return (mhp); 678*0Sstevel@tonic-gate } 679*0Sstevel@tonic-gate 680*0Sstevel@tonic-gate static void 681*0Sstevel@tonic-gate kphysm_free_mem_handle(struct mem_handle *mhp) 682*0Sstevel@tonic-gate { 683*0Sstevel@tonic-gate struct mem_handle **mhpp; 684*0Sstevel@tonic-gate 685*0Sstevel@tonic-gate ASSERT(mutex_owned(&mhp->mh_mutex)); 686*0Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_FREE); 687*0Sstevel@tonic-gate /* 688*0Sstevel@tonic-gate * Exit the mutex to preserve locking order. This is OK 689*0Sstevel@tonic-gate * here as once in the FREE state, the handle cannot 690*0Sstevel@tonic-gate * be found by a lookup. 691*0Sstevel@tonic-gate */ 692*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 693*0Sstevel@tonic-gate 694*0Sstevel@tonic-gate mutex_enter(&mem_handle_list_mutex); 695*0Sstevel@tonic-gate mhpp = &mem_handle_head; 696*0Sstevel@tonic-gate while (*mhpp != NULL && *mhpp != mhp) 697*0Sstevel@tonic-gate mhpp = &(*mhpp)->mh_next; 698*0Sstevel@tonic-gate ASSERT(*mhpp == mhp); 699*0Sstevel@tonic-gate /* 700*0Sstevel@tonic-gate * No need to lock the handle (mh_mutex) as only 701*0Sstevel@tonic-gate * mh_next changing and this is the only thread that 702*0Sstevel@tonic-gate * can be referncing mhp. 703*0Sstevel@tonic-gate */ 704*0Sstevel@tonic-gate *mhpp = mhp->mh_next; 705*0Sstevel@tonic-gate mutex_exit(&mem_handle_list_mutex); 706*0Sstevel@tonic-gate 707*0Sstevel@tonic-gate mutex_destroy(&mhp->mh_mutex); 708*0Sstevel@tonic-gate kmem_free(mhp, sizeof (struct mem_handle)); 709*0Sstevel@tonic-gate } 710*0Sstevel@tonic-gate 711*0Sstevel@tonic-gate /* 712*0Sstevel@tonic-gate * This function finds the internal mem_handle corresponding to an 713*0Sstevel@tonic-gate * external handle and returns it with the mh_mutex held. 714*0Sstevel@tonic-gate */ 715*0Sstevel@tonic-gate static struct mem_handle * 716*0Sstevel@tonic-gate kphysm_lookup_mem_handle(memhandle_t handle) 717*0Sstevel@tonic-gate { 718*0Sstevel@tonic-gate struct mem_handle *mhp; 719*0Sstevel@tonic-gate 720*0Sstevel@tonic-gate mutex_enter(&mem_handle_list_mutex); 721*0Sstevel@tonic-gate for (mhp = mem_handle_head; mhp != NULL; mhp = mhp->mh_next) { 722*0Sstevel@tonic-gate if (mhp->mh_exthandle == handle) { 723*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 724*0Sstevel@tonic-gate /* 725*0Sstevel@tonic-gate * The state of the handle could have been changed 726*0Sstevel@tonic-gate * by kphysm_del_release() while waiting for mh_mutex. 727*0Sstevel@tonic-gate */ 728*0Sstevel@tonic-gate if (mhp->mh_state == MHND_FREE) { 729*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 730*0Sstevel@tonic-gate continue; 731*0Sstevel@tonic-gate } 732*0Sstevel@tonic-gate break; 733*0Sstevel@tonic-gate } 734*0Sstevel@tonic-gate } 735*0Sstevel@tonic-gate mutex_exit(&mem_handle_list_mutex); 736*0Sstevel@tonic-gate return (mhp); 737*0Sstevel@tonic-gate } 738*0Sstevel@tonic-gate 739*0Sstevel@tonic-gate int 740*0Sstevel@tonic-gate kphysm_del_gethandle(memhandle_t *xmhp) 741*0Sstevel@tonic-gate { 742*0Sstevel@tonic-gate struct mem_handle *mhp; 743*0Sstevel@tonic-gate 744*0Sstevel@tonic-gate mhp = kphysm_allocate_mem_handle(); 745*0Sstevel@tonic-gate /* 746*0Sstevel@tonic-gate * The handle is allocated using KM_SLEEP, so cannot fail. 747*0Sstevel@tonic-gate * If the implementation is changed, the correct error to return 748*0Sstevel@tonic-gate * here would be KPHYSM_ENOHANDLES. 749*0Sstevel@tonic-gate */ 750*0Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_FREE); 751*0Sstevel@tonic-gate mhp->mh_state = MHND_INIT; 752*0Sstevel@tonic-gate *xmhp = mhp->mh_exthandle; 753*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 754*0Sstevel@tonic-gate return (KPHYSM_OK); 755*0Sstevel@tonic-gate } 756*0Sstevel@tonic-gate 757*0Sstevel@tonic-gate static int 758*0Sstevel@tonic-gate overlapping(pfn_t b1, pgcnt_t l1, pfn_t b2, pgcnt_t l2) 759*0Sstevel@tonic-gate { 760*0Sstevel@tonic-gate pfn_t e1, e2; 761*0Sstevel@tonic-gate 762*0Sstevel@tonic-gate e1 = b1 + l1; 763*0Sstevel@tonic-gate e2 = b2 + l2; 764*0Sstevel@tonic-gate 765*0Sstevel@tonic-gate return (!(b2 >= e1 || b1 >= e2)); 766*0Sstevel@tonic-gate } 767*0Sstevel@tonic-gate 768*0Sstevel@tonic-gate static int can_remove_pgs(pgcnt_t); 769*0Sstevel@tonic-gate 770*0Sstevel@tonic-gate static struct memdelspan * 771*0Sstevel@tonic-gate span_to_install(pfn_t base, pgcnt_t npgs) 772*0Sstevel@tonic-gate { 773*0Sstevel@tonic-gate struct memdelspan *mdsp; 774*0Sstevel@tonic-gate struct memdelspan *mdsp_new; 775*0Sstevel@tonic-gate uint64_t address, size, thislen; 776*0Sstevel@tonic-gate struct memlist *mlp; 777*0Sstevel@tonic-gate 778*0Sstevel@tonic-gate mdsp_new = NULL; 779*0Sstevel@tonic-gate 780*0Sstevel@tonic-gate address = (uint64_t)base << PAGESHIFT; 781*0Sstevel@tonic-gate size = (uint64_t)npgs << PAGESHIFT; 782*0Sstevel@tonic-gate while (size != 0) { 783*0Sstevel@tonic-gate memlist_read_lock(); 784*0Sstevel@tonic-gate for (mlp = phys_install; mlp != NULL; mlp = mlp->next) { 785*0Sstevel@tonic-gate if (address >= (mlp->address + mlp->size)) 786*0Sstevel@tonic-gate continue; 787*0Sstevel@tonic-gate if ((address + size) > mlp->address) 788*0Sstevel@tonic-gate break; 789*0Sstevel@tonic-gate } 790*0Sstevel@tonic-gate if (mlp == NULL) { 791*0Sstevel@tonic-gate address += size; 792*0Sstevel@tonic-gate size = 0; 793*0Sstevel@tonic-gate thislen = 0; 794*0Sstevel@tonic-gate } else { 795*0Sstevel@tonic-gate if (address < mlp->address) { 796*0Sstevel@tonic-gate size -= (mlp->address - address); 797*0Sstevel@tonic-gate address = mlp->address; 798*0Sstevel@tonic-gate } 799*0Sstevel@tonic-gate ASSERT(address >= mlp->address); 800*0Sstevel@tonic-gate if ((address + size) > (mlp->address + mlp->size)) { 801*0Sstevel@tonic-gate thislen = mlp->size - (address - mlp->address); 802*0Sstevel@tonic-gate } else { 803*0Sstevel@tonic-gate thislen = size; 804*0Sstevel@tonic-gate } 805*0Sstevel@tonic-gate } 806*0Sstevel@tonic-gate memlist_read_unlock(); 807*0Sstevel@tonic-gate /* TODO: phys_install could change now */ 808*0Sstevel@tonic-gate if (thislen == 0) 809*0Sstevel@tonic-gate continue; 810*0Sstevel@tonic-gate mdsp = kmem_zalloc(sizeof (struct memdelspan), KM_SLEEP); 811*0Sstevel@tonic-gate mdsp->mds_base = btop(address); 812*0Sstevel@tonic-gate mdsp->mds_npgs = btop(thislen); 813*0Sstevel@tonic-gate mdsp->mds_next = mdsp_new; 814*0Sstevel@tonic-gate mdsp_new = mdsp; 815*0Sstevel@tonic-gate address += thislen; 816*0Sstevel@tonic-gate size -= thislen; 817*0Sstevel@tonic-gate } 818*0Sstevel@tonic-gate return (mdsp_new); 819*0Sstevel@tonic-gate } 820*0Sstevel@tonic-gate 821*0Sstevel@tonic-gate static void 822*0Sstevel@tonic-gate free_delspans(struct memdelspan *mdsp) 823*0Sstevel@tonic-gate { 824*0Sstevel@tonic-gate struct memdelspan *amdsp; 825*0Sstevel@tonic-gate 826*0Sstevel@tonic-gate while ((amdsp = mdsp) != NULL) { 827*0Sstevel@tonic-gate mdsp = amdsp->mds_next; 828*0Sstevel@tonic-gate kmem_free(amdsp, sizeof (struct memdelspan)); 829*0Sstevel@tonic-gate } 830*0Sstevel@tonic-gate } 831*0Sstevel@tonic-gate 832*0Sstevel@tonic-gate /* 833*0Sstevel@tonic-gate * Concatenate lists. No list ordering is required. 834*0Sstevel@tonic-gate */ 835*0Sstevel@tonic-gate 836*0Sstevel@tonic-gate static void 837*0Sstevel@tonic-gate delspan_concat(struct memdelspan **mdspp, struct memdelspan *mdsp) 838*0Sstevel@tonic-gate { 839*0Sstevel@tonic-gate while (*mdspp != NULL) 840*0Sstevel@tonic-gate mdspp = &(*mdspp)->mds_next; 841*0Sstevel@tonic-gate 842*0Sstevel@tonic-gate *mdspp = mdsp; 843*0Sstevel@tonic-gate } 844*0Sstevel@tonic-gate 845*0Sstevel@tonic-gate /* 846*0Sstevel@tonic-gate * Given a new list of delspans, check there is no overlap with 847*0Sstevel@tonic-gate * all existing span activity (add or delete) and then concatenate 848*0Sstevel@tonic-gate * the new spans to the given list. 849*0Sstevel@tonic-gate * Return 1 for OK, 0 if overlapping. 850*0Sstevel@tonic-gate */ 851*0Sstevel@tonic-gate static int 852*0Sstevel@tonic-gate delspan_insert( 853*0Sstevel@tonic-gate struct transit_list *my_tlp, 854*0Sstevel@tonic-gate struct memdelspan *mdsp_new) 855*0Sstevel@tonic-gate { 856*0Sstevel@tonic-gate struct transit_list_head *trh; 857*0Sstevel@tonic-gate struct transit_list *tlp; 858*0Sstevel@tonic-gate int ret; 859*0Sstevel@tonic-gate 860*0Sstevel@tonic-gate trh = &transit_list_head; 861*0Sstevel@tonic-gate 862*0Sstevel@tonic-gate ASSERT(my_tlp != NULL); 863*0Sstevel@tonic-gate ASSERT(mdsp_new != NULL); 864*0Sstevel@tonic-gate 865*0Sstevel@tonic-gate ret = 1; 866*0Sstevel@tonic-gate mutex_enter(&trh->trh_lock); 867*0Sstevel@tonic-gate /* ASSERT(my_tlp->trl_spans == NULL || tlp_in_list(trh, my_tlp)); */ 868*0Sstevel@tonic-gate for (tlp = trh->trh_head; tlp != NULL; tlp = tlp->trl_next) { 869*0Sstevel@tonic-gate struct memdelspan *mdsp; 870*0Sstevel@tonic-gate 871*0Sstevel@tonic-gate for (mdsp = tlp->trl_spans; mdsp != NULL; 872*0Sstevel@tonic-gate mdsp = mdsp->mds_next) { 873*0Sstevel@tonic-gate struct memdelspan *nmdsp; 874*0Sstevel@tonic-gate 875*0Sstevel@tonic-gate for (nmdsp = mdsp_new; nmdsp != NULL; 876*0Sstevel@tonic-gate nmdsp = nmdsp->mds_next) { 877*0Sstevel@tonic-gate if (overlapping(mdsp->mds_base, mdsp->mds_npgs, 878*0Sstevel@tonic-gate nmdsp->mds_base, nmdsp->mds_npgs)) { 879*0Sstevel@tonic-gate ret = 0; 880*0Sstevel@tonic-gate goto done; 881*0Sstevel@tonic-gate } 882*0Sstevel@tonic-gate } 883*0Sstevel@tonic-gate } 884*0Sstevel@tonic-gate } 885*0Sstevel@tonic-gate done: 886*0Sstevel@tonic-gate if (ret != 0) { 887*0Sstevel@tonic-gate if (my_tlp->trl_spans == NULL) 888*0Sstevel@tonic-gate transit_list_insert(my_tlp); 889*0Sstevel@tonic-gate delspan_concat(&my_tlp->trl_spans, mdsp_new); 890*0Sstevel@tonic-gate } 891*0Sstevel@tonic-gate mutex_exit(&trh->trh_lock); 892*0Sstevel@tonic-gate return (ret); 893*0Sstevel@tonic-gate } 894*0Sstevel@tonic-gate 895*0Sstevel@tonic-gate static void 896*0Sstevel@tonic-gate delspan_remove( 897*0Sstevel@tonic-gate struct transit_list *my_tlp, 898*0Sstevel@tonic-gate pfn_t base, 899*0Sstevel@tonic-gate pgcnt_t npgs) 900*0Sstevel@tonic-gate { 901*0Sstevel@tonic-gate struct transit_list_head *trh; 902*0Sstevel@tonic-gate struct memdelspan *mdsp; 903*0Sstevel@tonic-gate 904*0Sstevel@tonic-gate trh = &transit_list_head; 905*0Sstevel@tonic-gate 906*0Sstevel@tonic-gate ASSERT(my_tlp != NULL); 907*0Sstevel@tonic-gate 908*0Sstevel@tonic-gate mutex_enter(&trh->trh_lock); 909*0Sstevel@tonic-gate if ((mdsp = my_tlp->trl_spans) != NULL) { 910*0Sstevel@tonic-gate if (npgs == 0) { 911*0Sstevel@tonic-gate my_tlp->trl_spans = NULL; 912*0Sstevel@tonic-gate free_delspans(mdsp); 913*0Sstevel@tonic-gate transit_list_remove(my_tlp); 914*0Sstevel@tonic-gate } else { 915*0Sstevel@tonic-gate struct memdelspan **prv; 916*0Sstevel@tonic-gate 917*0Sstevel@tonic-gate prv = &my_tlp->trl_spans; 918*0Sstevel@tonic-gate while (mdsp != NULL) { 919*0Sstevel@tonic-gate pfn_t p_end; 920*0Sstevel@tonic-gate 921*0Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs; 922*0Sstevel@tonic-gate if (mdsp->mds_base >= base && 923*0Sstevel@tonic-gate p_end <= (base + npgs)) { 924*0Sstevel@tonic-gate *prv = mdsp->mds_next; 925*0Sstevel@tonic-gate mdsp->mds_next = NULL; 926*0Sstevel@tonic-gate free_delspans(mdsp); 927*0Sstevel@tonic-gate } else { 928*0Sstevel@tonic-gate prv = &mdsp->mds_next; 929*0Sstevel@tonic-gate } 930*0Sstevel@tonic-gate mdsp = *prv; 931*0Sstevel@tonic-gate } 932*0Sstevel@tonic-gate if (my_tlp->trl_spans == NULL) 933*0Sstevel@tonic-gate transit_list_remove(my_tlp); 934*0Sstevel@tonic-gate } 935*0Sstevel@tonic-gate } 936*0Sstevel@tonic-gate mutex_exit(&trh->trh_lock); 937*0Sstevel@tonic-gate } 938*0Sstevel@tonic-gate 939*0Sstevel@tonic-gate /* 940*0Sstevel@tonic-gate * Reserve interface for add to stop delete before add finished. 941*0Sstevel@tonic-gate * This list is only accessed through the delspan_insert/remove 942*0Sstevel@tonic-gate * functions and so is fully protected by the mutex in struct transit_list. 943*0Sstevel@tonic-gate */ 944*0Sstevel@tonic-gate 945*0Sstevel@tonic-gate static struct transit_list reserve_transit; 946*0Sstevel@tonic-gate 947*0Sstevel@tonic-gate static int 948*0Sstevel@tonic-gate delspan_reserve(pfn_t base, pgcnt_t npgs) 949*0Sstevel@tonic-gate { 950*0Sstevel@tonic-gate struct memdelspan *mdsp; 951*0Sstevel@tonic-gate int ret; 952*0Sstevel@tonic-gate 953*0Sstevel@tonic-gate mdsp = kmem_zalloc(sizeof (struct memdelspan), KM_SLEEP); 954*0Sstevel@tonic-gate mdsp->mds_base = base; 955*0Sstevel@tonic-gate mdsp->mds_npgs = npgs; 956*0Sstevel@tonic-gate if ((ret = delspan_insert(&reserve_transit, mdsp)) == 0) { 957*0Sstevel@tonic-gate free_delspans(mdsp); 958*0Sstevel@tonic-gate } 959*0Sstevel@tonic-gate return (ret); 960*0Sstevel@tonic-gate } 961*0Sstevel@tonic-gate 962*0Sstevel@tonic-gate static void 963*0Sstevel@tonic-gate delspan_unreserve(pfn_t base, pgcnt_t npgs) 964*0Sstevel@tonic-gate { 965*0Sstevel@tonic-gate delspan_remove(&reserve_transit, base, npgs); 966*0Sstevel@tonic-gate } 967*0Sstevel@tonic-gate 968*0Sstevel@tonic-gate /* 969*0Sstevel@tonic-gate * Return whether memseg was created by kphysm_add_memory_dynamic(). 970*0Sstevel@tonic-gate * If this is the case and startp non zero, return also the start pfn 971*0Sstevel@tonic-gate * of the meta data via startp. 972*0Sstevel@tonic-gate */ 973*0Sstevel@tonic-gate static int 974*0Sstevel@tonic-gate memseg_is_dynamic(struct memseg *seg, pfn_t *startp) 975*0Sstevel@tonic-gate { 976*0Sstevel@tonic-gate pfn_t pt_start; 977*0Sstevel@tonic-gate 978*0Sstevel@tonic-gate if ((seg->msegflags & MEMSEG_DYNAMIC) == 0) 979*0Sstevel@tonic-gate return (0); 980*0Sstevel@tonic-gate 981*0Sstevel@tonic-gate /* Meta data is required to be at the beginning */ 982*0Sstevel@tonic-gate ASSERT(hat_getpfnum(kas.a_hat, (caddr_t)seg->epages) < seg->pages_base); 983*0Sstevel@tonic-gate 984*0Sstevel@tonic-gate pt_start = hat_getpfnum(kas.a_hat, (caddr_t)seg->pages); 985*0Sstevel@tonic-gate if (startp != NULL) 986*0Sstevel@tonic-gate *startp = pt_start; 987*0Sstevel@tonic-gate 988*0Sstevel@tonic-gate return (1); 989*0Sstevel@tonic-gate } 990*0Sstevel@tonic-gate 991*0Sstevel@tonic-gate int 992*0Sstevel@tonic-gate kphysm_del_span( 993*0Sstevel@tonic-gate memhandle_t handle, 994*0Sstevel@tonic-gate pfn_t base, 995*0Sstevel@tonic-gate pgcnt_t npgs) 996*0Sstevel@tonic-gate { 997*0Sstevel@tonic-gate struct mem_handle *mhp; 998*0Sstevel@tonic-gate struct memseg *seg; 999*0Sstevel@tonic-gate struct memdelspan *mdsp; 1000*0Sstevel@tonic-gate struct memdelspan *mdsp_new; 1001*0Sstevel@tonic-gate pgcnt_t phys_pages, vm_pages; 1002*0Sstevel@tonic-gate pfn_t p_end; 1003*0Sstevel@tonic-gate page_t *pp; 1004*0Sstevel@tonic-gate int ret; 1005*0Sstevel@tonic-gate 1006*0Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle); 1007*0Sstevel@tonic-gate if (mhp == NULL) { 1008*0Sstevel@tonic-gate return (KPHYSM_EHANDLE); 1009*0Sstevel@tonic-gate } 1010*0Sstevel@tonic-gate if (mhp->mh_state != MHND_INIT) { 1011*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 1012*0Sstevel@tonic-gate return (KPHYSM_ESEQUENCE); 1013*0Sstevel@tonic-gate } 1014*0Sstevel@tonic-gate 1015*0Sstevel@tonic-gate /* 1016*0Sstevel@tonic-gate * Intersect the span with the installed memory list (phys_install). 1017*0Sstevel@tonic-gate */ 1018*0Sstevel@tonic-gate mdsp_new = span_to_install(base, npgs); 1019*0Sstevel@tonic-gate if (mdsp_new == NULL) { 1020*0Sstevel@tonic-gate /* 1021*0Sstevel@tonic-gate * No physical memory in this range. Is this an 1022*0Sstevel@tonic-gate * error? If an attempt to start the delete is made 1023*0Sstevel@tonic-gate * for OK returns from del_span such as this, start will 1024*0Sstevel@tonic-gate * return an error. 1025*0Sstevel@tonic-gate * Could return KPHYSM_ENOWORK. 1026*0Sstevel@tonic-gate */ 1027*0Sstevel@tonic-gate /* 1028*0Sstevel@tonic-gate * It is assumed that there are no error returns 1029*0Sstevel@tonic-gate * from span_to_install() due to kmem_alloc failure. 1030*0Sstevel@tonic-gate */ 1031*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 1032*0Sstevel@tonic-gate return (KPHYSM_OK); 1033*0Sstevel@tonic-gate } 1034*0Sstevel@tonic-gate /* 1035*0Sstevel@tonic-gate * Does this span overlap an existing span? 1036*0Sstevel@tonic-gate */ 1037*0Sstevel@tonic-gate if (delspan_insert(&mhp->mh_transit, mdsp_new) == 0) { 1038*0Sstevel@tonic-gate /* 1039*0Sstevel@tonic-gate * Differentiate between already on list for this handle 1040*0Sstevel@tonic-gate * (KPHYSM_EDUP) and busy elsewhere (KPHYSM_EBUSY). 1041*0Sstevel@tonic-gate */ 1042*0Sstevel@tonic-gate ret = KPHYSM_EBUSY; 1043*0Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 1044*0Sstevel@tonic-gate mdsp = mdsp->mds_next) { 1045*0Sstevel@tonic-gate if (overlapping(mdsp->mds_base, mdsp->mds_npgs, 1046*0Sstevel@tonic-gate base, npgs)) { 1047*0Sstevel@tonic-gate ret = KPHYSM_EDUP; 1048*0Sstevel@tonic-gate break; 1049*0Sstevel@tonic-gate } 1050*0Sstevel@tonic-gate } 1051*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 1052*0Sstevel@tonic-gate free_delspans(mdsp_new); 1053*0Sstevel@tonic-gate return (ret); 1054*0Sstevel@tonic-gate } 1055*0Sstevel@tonic-gate /* 1056*0Sstevel@tonic-gate * At this point the spans in mdsp_new have been inserted into the 1057*0Sstevel@tonic-gate * list of spans for this handle and thereby to the global list of 1058*0Sstevel@tonic-gate * spans being processed. Each of these spans must now be checked 1059*0Sstevel@tonic-gate * for relocatability. As a side-effect segments in the memseg list 1060*0Sstevel@tonic-gate * may be split. 1061*0Sstevel@tonic-gate * 1062*0Sstevel@tonic-gate * Note that mdsp_new can no longer be used as it is now part of 1063*0Sstevel@tonic-gate * a larger list. Select elements of this larger list based 1064*0Sstevel@tonic-gate * on base and npgs. 1065*0Sstevel@tonic-gate */ 1066*0Sstevel@tonic-gate restart: 1067*0Sstevel@tonic-gate phys_pages = 0; 1068*0Sstevel@tonic-gate vm_pages = 0; 1069*0Sstevel@tonic-gate ret = KPHYSM_OK; 1070*0Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 1071*0Sstevel@tonic-gate mdsp = mdsp->mds_next) { 1072*0Sstevel@tonic-gate pgcnt_t pages_checked; 1073*0Sstevel@tonic-gate 1074*0Sstevel@tonic-gate if (!overlapping(mdsp->mds_base, mdsp->mds_npgs, base, npgs)) { 1075*0Sstevel@tonic-gate continue; 1076*0Sstevel@tonic-gate } 1077*0Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs; 1078*0Sstevel@tonic-gate /* 1079*0Sstevel@tonic-gate * The pages_checked count is a hack. All pages should be 1080*0Sstevel@tonic-gate * checked for relocatability. Those not covered by memsegs 1081*0Sstevel@tonic-gate * should be tested with arch_kphysm_del_span_ok(). 1082*0Sstevel@tonic-gate */ 1083*0Sstevel@tonic-gate pages_checked = 0; 1084*0Sstevel@tonic-gate for (seg = memsegs; seg; seg = seg->next) { 1085*0Sstevel@tonic-gate pfn_t mseg_start; 1086*0Sstevel@tonic-gate 1087*0Sstevel@tonic-gate if (seg->pages_base >= p_end || 1088*0Sstevel@tonic-gate seg->pages_end <= mdsp->mds_base) { 1089*0Sstevel@tonic-gate /* Span and memseg don't overlap. */ 1090*0Sstevel@tonic-gate continue; 1091*0Sstevel@tonic-gate } 1092*0Sstevel@tonic-gate /* Check that segment is suitable for delete. */ 1093*0Sstevel@tonic-gate if (memseg_is_dynamic(seg, &mseg_start)) { 1094*0Sstevel@tonic-gate /* 1095*0Sstevel@tonic-gate * Can only delete whole added segments 1096*0Sstevel@tonic-gate * for the moment. 1097*0Sstevel@tonic-gate * Check that this is completely within the 1098*0Sstevel@tonic-gate * span. 1099*0Sstevel@tonic-gate */ 1100*0Sstevel@tonic-gate if (mseg_start < mdsp->mds_base || 1101*0Sstevel@tonic-gate seg->pages_end > p_end) { 1102*0Sstevel@tonic-gate ret = KPHYSM_EBUSY; 1103*0Sstevel@tonic-gate break; 1104*0Sstevel@tonic-gate } 1105*0Sstevel@tonic-gate pages_checked += seg->pages_end - mseg_start; 1106*0Sstevel@tonic-gate } else { 1107*0Sstevel@tonic-gate /* 1108*0Sstevel@tonic-gate * Set mseg_start for accounting below. 1109*0Sstevel@tonic-gate */ 1110*0Sstevel@tonic-gate mseg_start = seg->pages_base; 1111*0Sstevel@tonic-gate /* 1112*0Sstevel@tonic-gate * If this segment is larger than the span, 1113*0Sstevel@tonic-gate * try to split it. After the split, it 1114*0Sstevel@tonic-gate * is necessary to restart. 1115*0Sstevel@tonic-gate */ 1116*0Sstevel@tonic-gate if (seg->pages_base < mdsp->mds_base || 1117*0Sstevel@tonic-gate seg->pages_end > p_end) { 1118*0Sstevel@tonic-gate pfn_t abase; 1119*0Sstevel@tonic-gate pgcnt_t anpgs; 1120*0Sstevel@tonic-gate int s_ret; 1121*0Sstevel@tonic-gate 1122*0Sstevel@tonic-gate /* Split required. */ 1123*0Sstevel@tonic-gate if (mdsp->mds_base < seg->pages_base) 1124*0Sstevel@tonic-gate abase = seg->pages_base; 1125*0Sstevel@tonic-gate else 1126*0Sstevel@tonic-gate abase = mdsp->mds_base; 1127*0Sstevel@tonic-gate if (p_end > seg->pages_end) 1128*0Sstevel@tonic-gate anpgs = seg->pages_end - abase; 1129*0Sstevel@tonic-gate else 1130*0Sstevel@tonic-gate anpgs = p_end - abase; 1131*0Sstevel@tonic-gate s_ret = kphysm_split_memseg(abase, 1132*0Sstevel@tonic-gate anpgs); 1133*0Sstevel@tonic-gate if (s_ret == 0) { 1134*0Sstevel@tonic-gate /* Split failed. */ 1135*0Sstevel@tonic-gate ret = KPHYSM_ERESOURCE; 1136*0Sstevel@tonic-gate break; 1137*0Sstevel@tonic-gate } 1138*0Sstevel@tonic-gate goto restart; 1139*0Sstevel@tonic-gate } 1140*0Sstevel@tonic-gate pages_checked += 1141*0Sstevel@tonic-gate seg->pages_end - seg->pages_base; 1142*0Sstevel@tonic-gate } 1143*0Sstevel@tonic-gate /* 1144*0Sstevel@tonic-gate * The memseg is wholly within the delete span. 1145*0Sstevel@tonic-gate * The individual pages can now be checked. 1146*0Sstevel@tonic-gate */ 1147*0Sstevel@tonic-gate /* Cage test. */ 1148*0Sstevel@tonic-gate for (pp = seg->pages; pp < seg->epages; pp++) { 1149*0Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 1150*0Sstevel@tonic-gate ret = KPHYSM_ENONRELOC; 1151*0Sstevel@tonic-gate break; 1152*0Sstevel@tonic-gate } 1153*0Sstevel@tonic-gate } 1154*0Sstevel@tonic-gate if (ret != KPHYSM_OK) { 1155*0Sstevel@tonic-gate break; 1156*0Sstevel@tonic-gate } 1157*0Sstevel@tonic-gate phys_pages += (seg->pages_end - mseg_start); 1158*0Sstevel@tonic-gate vm_pages += MSEG_NPAGES(seg); 1159*0Sstevel@tonic-gate } 1160*0Sstevel@tonic-gate if (ret != KPHYSM_OK) 1161*0Sstevel@tonic-gate break; 1162*0Sstevel@tonic-gate if (pages_checked != mdsp->mds_npgs) { 1163*0Sstevel@tonic-gate ret = KPHYSM_ENONRELOC; 1164*0Sstevel@tonic-gate break; 1165*0Sstevel@tonic-gate } 1166*0Sstevel@tonic-gate } 1167*0Sstevel@tonic-gate 1168*0Sstevel@tonic-gate if (ret == KPHYSM_OK) { 1169*0Sstevel@tonic-gate mhp->mh_phys_pages += phys_pages; 1170*0Sstevel@tonic-gate mhp->mh_vm_pages += vm_pages; 1171*0Sstevel@tonic-gate } else { 1172*0Sstevel@tonic-gate /* 1173*0Sstevel@tonic-gate * Keep holding the mh_mutex to prevent it going away. 1174*0Sstevel@tonic-gate */ 1175*0Sstevel@tonic-gate delspan_remove(&mhp->mh_transit, base, npgs); 1176*0Sstevel@tonic-gate } 1177*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 1178*0Sstevel@tonic-gate return (ret); 1179*0Sstevel@tonic-gate } 1180*0Sstevel@tonic-gate 1181*0Sstevel@tonic-gate int 1182*0Sstevel@tonic-gate kphysm_del_span_query( 1183*0Sstevel@tonic-gate pfn_t base, 1184*0Sstevel@tonic-gate pgcnt_t npgs, 1185*0Sstevel@tonic-gate memquery_t *mqp) 1186*0Sstevel@tonic-gate { 1187*0Sstevel@tonic-gate struct memdelspan *mdsp; 1188*0Sstevel@tonic-gate struct memdelspan *mdsp_new; 1189*0Sstevel@tonic-gate int done_first_nonreloc; 1190*0Sstevel@tonic-gate 1191*0Sstevel@tonic-gate mqp->phys_pages = 0; 1192*0Sstevel@tonic-gate mqp->managed = 0; 1193*0Sstevel@tonic-gate mqp->nonrelocatable = 0; 1194*0Sstevel@tonic-gate mqp->first_nonrelocatable = 0; 1195*0Sstevel@tonic-gate mqp->last_nonrelocatable = 0; 1196*0Sstevel@tonic-gate 1197*0Sstevel@tonic-gate mdsp_new = span_to_install(base, npgs); 1198*0Sstevel@tonic-gate /* 1199*0Sstevel@tonic-gate * It is OK to proceed here if mdsp_new == NULL. 1200*0Sstevel@tonic-gate */ 1201*0Sstevel@tonic-gate done_first_nonreloc = 0; 1202*0Sstevel@tonic-gate for (mdsp = mdsp_new; mdsp != NULL; mdsp = mdsp->mds_next) { 1203*0Sstevel@tonic-gate pfn_t sbase; 1204*0Sstevel@tonic-gate pgcnt_t snpgs; 1205*0Sstevel@tonic-gate 1206*0Sstevel@tonic-gate mqp->phys_pages += mdsp->mds_npgs; 1207*0Sstevel@tonic-gate sbase = mdsp->mds_base; 1208*0Sstevel@tonic-gate snpgs = mdsp->mds_npgs; 1209*0Sstevel@tonic-gate while (snpgs != 0) { 1210*0Sstevel@tonic-gate struct memseg *lseg, *seg; 1211*0Sstevel@tonic-gate pfn_t p_end; 1212*0Sstevel@tonic-gate page_t *pp; 1213*0Sstevel@tonic-gate pfn_t mseg_start; 1214*0Sstevel@tonic-gate 1215*0Sstevel@tonic-gate p_end = sbase + snpgs; 1216*0Sstevel@tonic-gate /* 1217*0Sstevel@tonic-gate * Find the lowest addressed memseg that starts 1218*0Sstevel@tonic-gate * after sbase and account for it. 1219*0Sstevel@tonic-gate * This is to catch dynamic memsegs whose start 1220*0Sstevel@tonic-gate * is hidden. 1221*0Sstevel@tonic-gate */ 1222*0Sstevel@tonic-gate seg = NULL; 1223*0Sstevel@tonic-gate for (lseg = memsegs; lseg != NULL; lseg = lseg->next) { 1224*0Sstevel@tonic-gate if ((lseg->pages_base >= sbase) || 1225*0Sstevel@tonic-gate (lseg->pages_base < p_end && 1226*0Sstevel@tonic-gate lseg->pages_end > sbase)) { 1227*0Sstevel@tonic-gate if (seg == NULL || 1228*0Sstevel@tonic-gate seg->pages_base > lseg->pages_base) 1229*0Sstevel@tonic-gate seg = lseg; 1230*0Sstevel@tonic-gate } 1231*0Sstevel@tonic-gate } 1232*0Sstevel@tonic-gate if (seg != NULL) { 1233*0Sstevel@tonic-gate if (!memseg_is_dynamic(seg, &mseg_start)) { 1234*0Sstevel@tonic-gate mseg_start = seg->pages_base; 1235*0Sstevel@tonic-gate } 1236*0Sstevel@tonic-gate /* 1237*0Sstevel@tonic-gate * Now have the full extent of the memseg so 1238*0Sstevel@tonic-gate * do the range check. 1239*0Sstevel@tonic-gate */ 1240*0Sstevel@tonic-gate if (mseg_start >= p_end || 1241*0Sstevel@tonic-gate seg->pages_end <= sbase) { 1242*0Sstevel@tonic-gate /* Span does not overlap memseg. */ 1243*0Sstevel@tonic-gate seg = NULL; 1244*0Sstevel@tonic-gate } 1245*0Sstevel@tonic-gate } 1246*0Sstevel@tonic-gate /* 1247*0Sstevel@tonic-gate * Account for gap either before the segment if 1248*0Sstevel@tonic-gate * there is one or to the end of the span. 1249*0Sstevel@tonic-gate */ 1250*0Sstevel@tonic-gate if (seg == NULL || mseg_start > sbase) { 1251*0Sstevel@tonic-gate pfn_t a_end; 1252*0Sstevel@tonic-gate 1253*0Sstevel@tonic-gate a_end = (seg == NULL) ? p_end : mseg_start; 1254*0Sstevel@tonic-gate /* 1255*0Sstevel@tonic-gate * Check with arch layer for relocatability. 1256*0Sstevel@tonic-gate */ 1257*0Sstevel@tonic-gate if (arch_kphysm_del_span_ok(sbase, 1258*0Sstevel@tonic-gate (a_end - sbase))) { 1259*0Sstevel@tonic-gate /* 1260*0Sstevel@tonic-gate * No non-relocatble pages in this 1261*0Sstevel@tonic-gate * area, avoid the fine-grained 1262*0Sstevel@tonic-gate * test. 1263*0Sstevel@tonic-gate */ 1264*0Sstevel@tonic-gate snpgs -= (a_end - sbase); 1265*0Sstevel@tonic-gate sbase = a_end; 1266*0Sstevel@tonic-gate } 1267*0Sstevel@tonic-gate while (sbase < a_end) { 1268*0Sstevel@tonic-gate if (!arch_kphysm_del_span_ok(sbase, 1269*0Sstevel@tonic-gate 1)) { 1270*0Sstevel@tonic-gate mqp->nonrelocatable++; 1271*0Sstevel@tonic-gate if (!done_first_nonreloc) { 1272*0Sstevel@tonic-gate mqp-> 1273*0Sstevel@tonic-gate first_nonrelocatable 1274*0Sstevel@tonic-gate = sbase; 1275*0Sstevel@tonic-gate done_first_nonreloc = 1; 1276*0Sstevel@tonic-gate } 1277*0Sstevel@tonic-gate mqp->last_nonrelocatable = 1278*0Sstevel@tonic-gate sbase; 1279*0Sstevel@tonic-gate } 1280*0Sstevel@tonic-gate sbase++; 1281*0Sstevel@tonic-gate snpgs--; 1282*0Sstevel@tonic-gate } 1283*0Sstevel@tonic-gate } 1284*0Sstevel@tonic-gate if (seg != NULL) { 1285*0Sstevel@tonic-gate ASSERT(mseg_start <= sbase); 1286*0Sstevel@tonic-gate if (seg->pages_base != mseg_start && 1287*0Sstevel@tonic-gate seg->pages_base > sbase) { 1288*0Sstevel@tonic-gate pgcnt_t skip_pgs; 1289*0Sstevel@tonic-gate 1290*0Sstevel@tonic-gate /* 1291*0Sstevel@tonic-gate * Skip the page_t area of a 1292*0Sstevel@tonic-gate * dynamic memseg. 1293*0Sstevel@tonic-gate */ 1294*0Sstevel@tonic-gate skip_pgs = seg->pages_base - sbase; 1295*0Sstevel@tonic-gate if (snpgs <= skip_pgs) { 1296*0Sstevel@tonic-gate sbase += snpgs; 1297*0Sstevel@tonic-gate snpgs = 0; 1298*0Sstevel@tonic-gate continue; 1299*0Sstevel@tonic-gate } 1300*0Sstevel@tonic-gate snpgs -= skip_pgs; 1301*0Sstevel@tonic-gate sbase += skip_pgs; 1302*0Sstevel@tonic-gate } 1303*0Sstevel@tonic-gate ASSERT(snpgs != 0); 1304*0Sstevel@tonic-gate ASSERT(seg->pages_base <= sbase); 1305*0Sstevel@tonic-gate /* 1306*0Sstevel@tonic-gate * The individual pages can now be checked. 1307*0Sstevel@tonic-gate */ 1308*0Sstevel@tonic-gate for (pp = seg->pages + 1309*0Sstevel@tonic-gate (sbase - seg->pages_base); 1310*0Sstevel@tonic-gate snpgs != 0 && pp < seg->epages; pp++) { 1311*0Sstevel@tonic-gate mqp->managed++; 1312*0Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 1313*0Sstevel@tonic-gate mqp->nonrelocatable++; 1314*0Sstevel@tonic-gate if (!done_first_nonreloc) { 1315*0Sstevel@tonic-gate mqp-> 1316*0Sstevel@tonic-gate first_nonrelocatable 1317*0Sstevel@tonic-gate = sbase; 1318*0Sstevel@tonic-gate done_first_nonreloc = 1; 1319*0Sstevel@tonic-gate } 1320*0Sstevel@tonic-gate mqp->last_nonrelocatable = 1321*0Sstevel@tonic-gate sbase; 1322*0Sstevel@tonic-gate } 1323*0Sstevel@tonic-gate sbase++; 1324*0Sstevel@tonic-gate snpgs--; 1325*0Sstevel@tonic-gate } 1326*0Sstevel@tonic-gate } 1327*0Sstevel@tonic-gate } 1328*0Sstevel@tonic-gate } 1329*0Sstevel@tonic-gate 1330*0Sstevel@tonic-gate free_delspans(mdsp_new); 1331*0Sstevel@tonic-gate 1332*0Sstevel@tonic-gate return (KPHYSM_OK); 1333*0Sstevel@tonic-gate } 1334*0Sstevel@tonic-gate 1335*0Sstevel@tonic-gate /* 1336*0Sstevel@tonic-gate * This release function can be called at any stage as follows: 1337*0Sstevel@tonic-gate * _gethandle only called 1338*0Sstevel@tonic-gate * _span(s) only called 1339*0Sstevel@tonic-gate * _start called but failed 1340*0Sstevel@tonic-gate * delete thread exited 1341*0Sstevel@tonic-gate */ 1342*0Sstevel@tonic-gate int 1343*0Sstevel@tonic-gate kphysm_del_release(memhandle_t handle) 1344*0Sstevel@tonic-gate { 1345*0Sstevel@tonic-gate struct mem_handle *mhp; 1346*0Sstevel@tonic-gate 1347*0Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle); 1348*0Sstevel@tonic-gate if (mhp == NULL) { 1349*0Sstevel@tonic-gate return (KPHYSM_EHANDLE); 1350*0Sstevel@tonic-gate } 1351*0Sstevel@tonic-gate switch (mhp->mh_state) { 1352*0Sstevel@tonic-gate case MHND_STARTING: 1353*0Sstevel@tonic-gate case MHND_RUNNING: 1354*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 1355*0Sstevel@tonic-gate return (KPHYSM_ENOTFINISHED); 1356*0Sstevel@tonic-gate case MHND_FREE: 1357*0Sstevel@tonic-gate ASSERT(mhp->mh_state != MHND_FREE); 1358*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 1359*0Sstevel@tonic-gate return (KPHYSM_EHANDLE); 1360*0Sstevel@tonic-gate case MHND_INIT: 1361*0Sstevel@tonic-gate break; 1362*0Sstevel@tonic-gate case MHND_DONE: 1363*0Sstevel@tonic-gate break; 1364*0Sstevel@tonic-gate case MHND_RELEASE: 1365*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 1366*0Sstevel@tonic-gate return (KPHYSM_ESEQUENCE); 1367*0Sstevel@tonic-gate default: 1368*0Sstevel@tonic-gate #ifdef DEBUG 1369*0Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_del_release(0x%p) state corrupt %d", 1370*0Sstevel@tonic-gate (void *)mhp, mhp->mh_state); 1371*0Sstevel@tonic-gate #endif /* DEBUG */ 1372*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 1373*0Sstevel@tonic-gate return (KPHYSM_EHANDLE); 1374*0Sstevel@tonic-gate } 1375*0Sstevel@tonic-gate /* 1376*0Sstevel@tonic-gate * Set state so that we can wait if necessary. 1377*0Sstevel@tonic-gate * Also this means that we have read/write access to all 1378*0Sstevel@tonic-gate * fields except mh_exthandle and mh_state. 1379*0Sstevel@tonic-gate */ 1380*0Sstevel@tonic-gate mhp->mh_state = MHND_RELEASE; 1381*0Sstevel@tonic-gate /* 1382*0Sstevel@tonic-gate * The mem_handle cannot be de-allocated by any other operation 1383*0Sstevel@tonic-gate * now, so no need to hold mh_mutex. 1384*0Sstevel@tonic-gate */ 1385*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 1386*0Sstevel@tonic-gate 1387*0Sstevel@tonic-gate delspan_remove(&mhp->mh_transit, 0, 0); 1388*0Sstevel@tonic-gate mhp->mh_phys_pages = 0; 1389*0Sstevel@tonic-gate mhp->mh_vm_pages = 0; 1390*0Sstevel@tonic-gate mhp->mh_hold_todo = 0; 1391*0Sstevel@tonic-gate mhp->mh_delete_complete = NULL; 1392*0Sstevel@tonic-gate mhp->mh_delete_complete_arg = NULL; 1393*0Sstevel@tonic-gate mhp->mh_cancel = 0; 1394*0Sstevel@tonic-gate 1395*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 1396*0Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_RELEASE); 1397*0Sstevel@tonic-gate mhp->mh_state = MHND_FREE; 1398*0Sstevel@tonic-gate 1399*0Sstevel@tonic-gate kphysm_free_mem_handle(mhp); 1400*0Sstevel@tonic-gate 1401*0Sstevel@tonic-gate return (KPHYSM_OK); 1402*0Sstevel@tonic-gate } 1403*0Sstevel@tonic-gate 1404*0Sstevel@tonic-gate /* 1405*0Sstevel@tonic-gate * This cancel function can only be called with the thread running. 1406*0Sstevel@tonic-gate */ 1407*0Sstevel@tonic-gate int 1408*0Sstevel@tonic-gate kphysm_del_cancel(memhandle_t handle) 1409*0Sstevel@tonic-gate { 1410*0Sstevel@tonic-gate struct mem_handle *mhp; 1411*0Sstevel@tonic-gate 1412*0Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle); 1413*0Sstevel@tonic-gate if (mhp == NULL) { 1414*0Sstevel@tonic-gate return (KPHYSM_EHANDLE); 1415*0Sstevel@tonic-gate } 1416*0Sstevel@tonic-gate if (mhp->mh_state != MHND_STARTING && mhp->mh_state != MHND_RUNNING) { 1417*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 1418*0Sstevel@tonic-gate return (KPHYSM_ENOTRUNNING); 1419*0Sstevel@tonic-gate } 1420*0Sstevel@tonic-gate /* 1421*0Sstevel@tonic-gate * Set the cancel flag and wake the delete thread up. 1422*0Sstevel@tonic-gate * The thread may be waiting on I/O, so the effect of the cancel 1423*0Sstevel@tonic-gate * may be delayed. 1424*0Sstevel@tonic-gate */ 1425*0Sstevel@tonic-gate if (mhp->mh_cancel == 0) { 1426*0Sstevel@tonic-gate mhp->mh_cancel = KPHYSM_ECANCELLED; 1427*0Sstevel@tonic-gate cv_signal(&mhp->mh_cv); 1428*0Sstevel@tonic-gate } 1429*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 1430*0Sstevel@tonic-gate return (KPHYSM_OK); 1431*0Sstevel@tonic-gate } 1432*0Sstevel@tonic-gate 1433*0Sstevel@tonic-gate int 1434*0Sstevel@tonic-gate kphysm_del_status( 1435*0Sstevel@tonic-gate memhandle_t handle, 1436*0Sstevel@tonic-gate memdelstat_t *mdstp) 1437*0Sstevel@tonic-gate { 1438*0Sstevel@tonic-gate struct mem_handle *mhp; 1439*0Sstevel@tonic-gate 1440*0Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle); 1441*0Sstevel@tonic-gate if (mhp == NULL) { 1442*0Sstevel@tonic-gate return (KPHYSM_EHANDLE); 1443*0Sstevel@tonic-gate } 1444*0Sstevel@tonic-gate /* 1445*0Sstevel@tonic-gate * Calling kphysm_del_status() is allowed before the delete 1446*0Sstevel@tonic-gate * is started to allow for status display. 1447*0Sstevel@tonic-gate */ 1448*0Sstevel@tonic-gate if (mhp->mh_state != MHND_INIT && mhp->mh_state != MHND_STARTING && 1449*0Sstevel@tonic-gate mhp->mh_state != MHND_RUNNING) { 1450*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 1451*0Sstevel@tonic-gate return (KPHYSM_ENOTRUNNING); 1452*0Sstevel@tonic-gate } 1453*0Sstevel@tonic-gate mdstp->phys_pages = mhp->mh_phys_pages; 1454*0Sstevel@tonic-gate mdstp->managed = mhp->mh_vm_pages; 1455*0Sstevel@tonic-gate mdstp->collected = mhp->mh_vm_pages - mhp->mh_hold_todo; 1456*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 1457*0Sstevel@tonic-gate return (KPHYSM_OK); 1458*0Sstevel@tonic-gate } 1459*0Sstevel@tonic-gate 1460*0Sstevel@tonic-gate static int mem_delete_additional_pages = 100; 1461*0Sstevel@tonic-gate 1462*0Sstevel@tonic-gate static int 1463*0Sstevel@tonic-gate can_remove_pgs(pgcnt_t npgs) 1464*0Sstevel@tonic-gate { 1465*0Sstevel@tonic-gate /* 1466*0Sstevel@tonic-gate * If all pageable pages were paged out, freemem would 1467*0Sstevel@tonic-gate * equal availrmem. There is a minimum requirement for 1468*0Sstevel@tonic-gate * availrmem. 1469*0Sstevel@tonic-gate */ 1470*0Sstevel@tonic-gate if ((availrmem - (tune.t_minarmem + mem_delete_additional_pages)) 1471*0Sstevel@tonic-gate < npgs) 1472*0Sstevel@tonic-gate return (0); 1473*0Sstevel@tonic-gate /* TODO: check swap space, etc. */ 1474*0Sstevel@tonic-gate return (1); 1475*0Sstevel@tonic-gate } 1476*0Sstevel@tonic-gate 1477*0Sstevel@tonic-gate static int 1478*0Sstevel@tonic-gate get_availrmem(pgcnt_t npgs) 1479*0Sstevel@tonic-gate { 1480*0Sstevel@tonic-gate int ret; 1481*0Sstevel@tonic-gate 1482*0Sstevel@tonic-gate mutex_enter(&freemem_lock); 1483*0Sstevel@tonic-gate ret = can_remove_pgs(npgs); 1484*0Sstevel@tonic-gate if (ret != 0) 1485*0Sstevel@tonic-gate availrmem -= npgs; 1486*0Sstevel@tonic-gate mutex_exit(&freemem_lock); 1487*0Sstevel@tonic-gate return (ret); 1488*0Sstevel@tonic-gate } 1489*0Sstevel@tonic-gate 1490*0Sstevel@tonic-gate static void 1491*0Sstevel@tonic-gate put_availrmem(pgcnt_t npgs) 1492*0Sstevel@tonic-gate { 1493*0Sstevel@tonic-gate mutex_enter(&freemem_lock); 1494*0Sstevel@tonic-gate availrmem += npgs; 1495*0Sstevel@tonic-gate mutex_exit(&freemem_lock); 1496*0Sstevel@tonic-gate } 1497*0Sstevel@tonic-gate 1498*0Sstevel@tonic-gate #define FREEMEM_INCR 100 1499*0Sstevel@tonic-gate static pgcnt_t freemem_incr = FREEMEM_INCR; 1500*0Sstevel@tonic-gate #define DEL_FREE_WAIT_FRAC 4 1501*0Sstevel@tonic-gate #define DEL_FREE_WAIT_TICKS ((hz+DEL_FREE_WAIT_FRAC-1)/DEL_FREE_WAIT_FRAC) 1502*0Sstevel@tonic-gate 1503*0Sstevel@tonic-gate #define DEL_BUSY_WAIT_FRAC 20 1504*0Sstevel@tonic-gate #define DEL_BUSY_WAIT_TICKS ((hz+DEL_BUSY_WAIT_FRAC-1)/DEL_BUSY_WAIT_FRAC) 1505*0Sstevel@tonic-gate 1506*0Sstevel@tonic-gate static void kphysm_del_cleanup(struct mem_handle *); 1507*0Sstevel@tonic-gate 1508*0Sstevel@tonic-gate static void page_delete_collect(page_t *, struct mem_handle *); 1509*0Sstevel@tonic-gate 1510*0Sstevel@tonic-gate static pgcnt_t 1511*0Sstevel@tonic-gate delthr_get_freemem(struct mem_handle *mhp) 1512*0Sstevel@tonic-gate { 1513*0Sstevel@tonic-gate pgcnt_t free_get; 1514*0Sstevel@tonic-gate int ret; 1515*0Sstevel@tonic-gate 1516*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&mhp->mh_mutex)); 1517*0Sstevel@tonic-gate 1518*0Sstevel@tonic-gate MDSTAT_INCR(mhp, need_free); 1519*0Sstevel@tonic-gate /* 1520*0Sstevel@tonic-gate * Get up to freemem_incr pages. 1521*0Sstevel@tonic-gate */ 1522*0Sstevel@tonic-gate free_get = freemem_incr; 1523*0Sstevel@tonic-gate if (free_get > mhp->mh_hold_todo) 1524*0Sstevel@tonic-gate free_get = mhp->mh_hold_todo; 1525*0Sstevel@tonic-gate /* 1526*0Sstevel@tonic-gate * Take free_get pages away from freemem, 1527*0Sstevel@tonic-gate * waiting if necessary. 1528*0Sstevel@tonic-gate */ 1529*0Sstevel@tonic-gate 1530*0Sstevel@tonic-gate while (!mhp->mh_cancel) { 1531*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 1532*0Sstevel@tonic-gate MDSTAT_INCR(mhp, free_loop); 1533*0Sstevel@tonic-gate /* 1534*0Sstevel@tonic-gate * Duplicate test from page_create_throttle() 1535*0Sstevel@tonic-gate * but don't override with !PG_WAIT. 1536*0Sstevel@tonic-gate */ 1537*0Sstevel@tonic-gate if (freemem < (free_get + throttlefree)) { 1538*0Sstevel@tonic-gate MDSTAT_INCR(mhp, free_low); 1539*0Sstevel@tonic-gate ret = 0; 1540*0Sstevel@tonic-gate } else { 1541*0Sstevel@tonic-gate ret = page_create_wait(free_get, 0); 1542*0Sstevel@tonic-gate if (ret == 0) { 1543*0Sstevel@tonic-gate /* EMPTY */ 1544*0Sstevel@tonic-gate MDSTAT_INCR(mhp, free_failed); 1545*0Sstevel@tonic-gate } 1546*0Sstevel@tonic-gate } 1547*0Sstevel@tonic-gate if (ret != 0) { 1548*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 1549*0Sstevel@tonic-gate return (free_get); 1550*0Sstevel@tonic-gate } 1551*0Sstevel@tonic-gate 1552*0Sstevel@tonic-gate /* 1553*0Sstevel@tonic-gate * Put pressure on pageout. 1554*0Sstevel@tonic-gate */ 1555*0Sstevel@tonic-gate page_needfree(free_get); 1556*0Sstevel@tonic-gate cv_signal(&proc_pageout->p_cv); 1557*0Sstevel@tonic-gate 1558*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 1559*0Sstevel@tonic-gate (void) cv_timedwait(&mhp->mh_cv, &mhp->mh_mutex, 1560*0Sstevel@tonic-gate (lbolt + DEL_FREE_WAIT_TICKS)); 1561*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 1562*0Sstevel@tonic-gate page_needfree(-(spgcnt_t)free_get); 1563*0Sstevel@tonic-gate 1564*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 1565*0Sstevel@tonic-gate } 1566*0Sstevel@tonic-gate return (0); 1567*0Sstevel@tonic-gate } 1568*0Sstevel@tonic-gate 1569*0Sstevel@tonic-gate #define DR_AIO_CLEANUP_DELAY 25000 /* 0.025secs, in usec */ 1570*0Sstevel@tonic-gate #define DR_AIO_CLEANUP_MAXLOOPS_NODELAY 100 1571*0Sstevel@tonic-gate /* 1572*0Sstevel@tonic-gate * This function is run as a helper thread for delete_memory_thread. 1573*0Sstevel@tonic-gate * It is needed in order to force kaio cleanup, so that pages used in kaio 1574*0Sstevel@tonic-gate * will be unlocked and subsequently relocated by delete_memory_thread. 1575*0Sstevel@tonic-gate * The address of the delete_memory_threads's mem_handle is passed in to 1576*0Sstevel@tonic-gate * this thread function, and is used to set the mh_aio_cleanup_done member 1577*0Sstevel@tonic-gate * prior to calling thread_exit(). 1578*0Sstevel@tonic-gate */ 1579*0Sstevel@tonic-gate static void 1580*0Sstevel@tonic-gate dr_aio_cleanup_thread(caddr_t amhp) 1581*0Sstevel@tonic-gate { 1582*0Sstevel@tonic-gate proc_t *procp; 1583*0Sstevel@tonic-gate int (*aio_cleanup_dr_delete_memory)(proc_t *); 1584*0Sstevel@tonic-gate int cleaned; 1585*0Sstevel@tonic-gate int n = 0; 1586*0Sstevel@tonic-gate struct mem_handle *mhp; 1587*0Sstevel@tonic-gate volatile uint_t *pcancel; 1588*0Sstevel@tonic-gate 1589*0Sstevel@tonic-gate mhp = (struct mem_handle *)amhp; 1590*0Sstevel@tonic-gate ASSERT(mhp != NULL); 1591*0Sstevel@tonic-gate pcancel = &mhp->mh_dr_aio_cleanup_cancel; 1592*0Sstevel@tonic-gate if (modload("sys", "kaio") == -1) { 1593*0Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 1; 1594*0Sstevel@tonic-gate cmn_err(CE_WARN, "dr_aio_cleanup_thread: cannot load kaio"); 1595*0Sstevel@tonic-gate thread_exit(); 1596*0Sstevel@tonic-gate } 1597*0Sstevel@tonic-gate aio_cleanup_dr_delete_memory = (int (*)(proc_t *)) 1598*0Sstevel@tonic-gate modgetsymvalue("aio_cleanup_dr_delete_memory", 0); 1599*0Sstevel@tonic-gate if (aio_cleanup_dr_delete_memory == NULL) { 1600*0Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 1; 1601*0Sstevel@tonic-gate cmn_err(CE_WARN, 1602*0Sstevel@tonic-gate "aio_cleanup_dr_delete_memory not found in kaio"); 1603*0Sstevel@tonic-gate thread_exit(); 1604*0Sstevel@tonic-gate } 1605*0Sstevel@tonic-gate do { 1606*0Sstevel@tonic-gate cleaned = 0; 1607*0Sstevel@tonic-gate mutex_enter(&pidlock); 1608*0Sstevel@tonic-gate for (procp = practive; (*pcancel == 0) && (procp != NULL); 1609*0Sstevel@tonic-gate procp = procp->p_next) { 1610*0Sstevel@tonic-gate mutex_enter(&procp->p_lock); 1611*0Sstevel@tonic-gate if (procp->p_aio != NULL) { 1612*0Sstevel@tonic-gate /* cleanup proc's outstanding kaio */ 1613*0Sstevel@tonic-gate cleaned += 1614*0Sstevel@tonic-gate (*aio_cleanup_dr_delete_memory)(procp); 1615*0Sstevel@tonic-gate } 1616*0Sstevel@tonic-gate mutex_exit(&procp->p_lock); 1617*0Sstevel@tonic-gate } 1618*0Sstevel@tonic-gate mutex_exit(&pidlock); 1619*0Sstevel@tonic-gate if ((*pcancel == 0) && 1620*0Sstevel@tonic-gate (!cleaned || (++n == DR_AIO_CLEANUP_MAXLOOPS_NODELAY))) { 1621*0Sstevel@tonic-gate /* delay a bit before retrying all procs again */ 1622*0Sstevel@tonic-gate delay(drv_usectohz(DR_AIO_CLEANUP_DELAY)); 1623*0Sstevel@tonic-gate n = 0; 1624*0Sstevel@tonic-gate } 1625*0Sstevel@tonic-gate } while (*pcancel == 0); 1626*0Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 1; 1627*0Sstevel@tonic-gate thread_exit(); 1628*0Sstevel@tonic-gate } 1629*0Sstevel@tonic-gate 1630*0Sstevel@tonic-gate static void 1631*0Sstevel@tonic-gate delete_memory_thread(caddr_t amhp) 1632*0Sstevel@tonic-gate { 1633*0Sstevel@tonic-gate struct mem_handle *mhp; 1634*0Sstevel@tonic-gate struct memdelspan *mdsp; 1635*0Sstevel@tonic-gate callb_cpr_t cprinfo; 1636*0Sstevel@tonic-gate page_t *pp_targ; 1637*0Sstevel@tonic-gate spgcnt_t freemem_left; 1638*0Sstevel@tonic-gate void (*del_complete_funcp)(void *, int error); 1639*0Sstevel@tonic-gate void *del_complete_arg; 1640*0Sstevel@tonic-gate int comp_code; 1641*0Sstevel@tonic-gate int ret; 1642*0Sstevel@tonic-gate int first_scan; 1643*0Sstevel@tonic-gate uint_t szc; 1644*0Sstevel@tonic-gate #ifdef MEM_DEL_STATS 1645*0Sstevel@tonic-gate uint64_t start_total, ntick_total; 1646*0Sstevel@tonic-gate uint64_t start_pgrp, ntick_pgrp; 1647*0Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 1648*0Sstevel@tonic-gate 1649*0Sstevel@tonic-gate mhp = (struct mem_handle *)amhp; 1650*0Sstevel@tonic-gate 1651*0Sstevel@tonic-gate #ifdef MEM_DEL_STATS 1652*0Sstevel@tonic-gate start_total = ddi_get_lbolt(); 1653*0Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 1654*0Sstevel@tonic-gate 1655*0Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &mhp->mh_mutex, 1656*0Sstevel@tonic-gate callb_generic_cpr, "memdel"); 1657*0Sstevel@tonic-gate 1658*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 1659*0Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_STARTING); 1660*0Sstevel@tonic-gate 1661*0Sstevel@tonic-gate mhp->mh_state = MHND_RUNNING; 1662*0Sstevel@tonic-gate mhp->mh_thread_id = curthread; 1663*0Sstevel@tonic-gate 1664*0Sstevel@tonic-gate mhp->mh_hold_todo = mhp->mh_vm_pages; 1665*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 1666*0Sstevel@tonic-gate 1667*0Sstevel@tonic-gate /* Allocate the remap pages now, if necessary. */ 1668*0Sstevel@tonic-gate memseg_remap_init(); 1669*0Sstevel@tonic-gate 1670*0Sstevel@tonic-gate /* 1671*0Sstevel@tonic-gate * Subtract from availrmem now if possible as availrmem 1672*0Sstevel@tonic-gate * may not be available by the end of the delete. 1673*0Sstevel@tonic-gate */ 1674*0Sstevel@tonic-gate if (!get_availrmem(mhp->mh_vm_pages)) { 1675*0Sstevel@tonic-gate comp_code = KPHYSM_ENOTVIABLE; 1676*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 1677*0Sstevel@tonic-gate goto early_exit; 1678*0Sstevel@tonic-gate } 1679*0Sstevel@tonic-gate 1680*0Sstevel@tonic-gate ret = kphysm_setup_pre_del(mhp->mh_vm_pages); 1681*0Sstevel@tonic-gate 1682*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 1683*0Sstevel@tonic-gate 1684*0Sstevel@tonic-gate if (ret != 0) { 1685*0Sstevel@tonic-gate mhp->mh_cancel = KPHYSM_EREFUSED; 1686*0Sstevel@tonic-gate goto refused; 1687*0Sstevel@tonic-gate } 1688*0Sstevel@tonic-gate 1689*0Sstevel@tonic-gate transit_list_collect(mhp, 1); 1690*0Sstevel@tonic-gate 1691*0Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 1692*0Sstevel@tonic-gate mdsp = mdsp->mds_next) { 1693*0Sstevel@tonic-gate ASSERT(mdsp->mds_bitmap == NULL); 1694*0Sstevel@tonic-gate mdsp->mds_bitmap = kmem_zalloc(MDS_BITMAPBYTES(mdsp), KM_SLEEP); 1695*0Sstevel@tonic-gate mdsp->mds_bitmap_retired = kmem_zalloc(MDS_BITMAPBYTES(mdsp), 1696*0Sstevel@tonic-gate KM_SLEEP); 1697*0Sstevel@tonic-gate } 1698*0Sstevel@tonic-gate 1699*0Sstevel@tonic-gate first_scan = 1; 1700*0Sstevel@tonic-gate freemem_left = 0; 1701*0Sstevel@tonic-gate /* 1702*0Sstevel@tonic-gate * Start dr_aio_cleanup_thread, which periodically iterates 1703*0Sstevel@tonic-gate * through the process list and invokes aio cleanup. This 1704*0Sstevel@tonic-gate * is needed in order to avoid a deadly embrace between the 1705*0Sstevel@tonic-gate * delete_memory_thread (waiting on writer lock for page, with the 1706*0Sstevel@tonic-gate * exclusive-wanted bit set), kaio read request threads (waiting for a 1707*0Sstevel@tonic-gate * reader lock on the same page that is wanted by the 1708*0Sstevel@tonic-gate * delete_memory_thread), and threads waiting for kaio completion 1709*0Sstevel@tonic-gate * (blocked on spt_amp->lock). 1710*0Sstevel@tonic-gate */ 1711*0Sstevel@tonic-gate mhp->mh_dr_aio_cleanup_cancel = 0; 1712*0Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 0; 1713*0Sstevel@tonic-gate (void) thread_create(NULL, 0, dr_aio_cleanup_thread, 1714*0Sstevel@tonic-gate (caddr_t)mhp, 0, &p0, TS_RUN, maxclsyspri - 1); 1715*0Sstevel@tonic-gate while ((mhp->mh_hold_todo != 0) && (mhp->mh_cancel == 0)) { 1716*0Sstevel@tonic-gate pgcnt_t collected; 1717*0Sstevel@tonic-gate 1718*0Sstevel@tonic-gate MDSTAT_INCR(mhp, nloop); 1719*0Sstevel@tonic-gate collected = 0; 1720*0Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; (mdsp != NULL) && 1721*0Sstevel@tonic-gate (mhp->mh_cancel == 0); mdsp = mdsp->mds_next) { 1722*0Sstevel@tonic-gate pfn_t pfn, p_end; 1723*0Sstevel@tonic-gate 1724*0Sstevel@tonic-gate if (first_scan) { 1725*0Sstevel@tonic-gate mem_node_pre_del_slice(mdsp->mds_base, 1726*0Sstevel@tonic-gate mdsp->mds_base + mdsp->mds_npgs - 1); 1727*0Sstevel@tonic-gate } 1728*0Sstevel@tonic-gate 1729*0Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs; 1730*0Sstevel@tonic-gate for (pfn = mdsp->mds_base; (pfn < p_end) && 1731*0Sstevel@tonic-gate (mhp->mh_cancel == 0); pfn++) { 1732*0Sstevel@tonic-gate page_t *pp, *tpp, *tpp_targ; 1733*0Sstevel@tonic-gate pgcnt_t bit; 1734*0Sstevel@tonic-gate struct vnode *vp; 1735*0Sstevel@tonic-gate u_offset_t offset; 1736*0Sstevel@tonic-gate int mod, result; 1737*0Sstevel@tonic-gate spgcnt_t pgcnt; 1738*0Sstevel@tonic-gate 1739*0Sstevel@tonic-gate bit = pfn - mdsp->mds_base; 1740*0Sstevel@tonic-gate if ((mdsp->mds_bitmap[bit / NBPBMW] & 1741*0Sstevel@tonic-gate (1 << (bit % NBPBMW))) != 0) { 1742*0Sstevel@tonic-gate MDSTAT_INCR(mhp, already_done); 1743*0Sstevel@tonic-gate continue; 1744*0Sstevel@tonic-gate } 1745*0Sstevel@tonic-gate if (freemem_left == 0) { 1746*0Sstevel@tonic-gate freemem_left += delthr_get_freemem(mhp); 1747*0Sstevel@tonic-gate if (freemem_left == 0) 1748*0Sstevel@tonic-gate break; 1749*0Sstevel@tonic-gate } 1750*0Sstevel@tonic-gate 1751*0Sstevel@tonic-gate /* 1752*0Sstevel@tonic-gate * Release mh_mutex - some of this 1753*0Sstevel@tonic-gate * stuff takes some time (eg PUTPAGE). 1754*0Sstevel@tonic-gate */ 1755*0Sstevel@tonic-gate 1756*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 1757*0Sstevel@tonic-gate MDSTAT_INCR(mhp, ncheck); 1758*0Sstevel@tonic-gate 1759*0Sstevel@tonic-gate pp = page_numtopp_nolock(pfn); 1760*0Sstevel@tonic-gate if (pp == NULL) { 1761*0Sstevel@tonic-gate /* 1762*0Sstevel@tonic-gate * Not covered by a page_t - will 1763*0Sstevel@tonic-gate * be dealt with elsewhere. 1764*0Sstevel@tonic-gate */ 1765*0Sstevel@tonic-gate MDSTAT_INCR(mhp, nopaget); 1766*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 1767*0Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |= 1768*0Sstevel@tonic-gate (1 << (bit % NBPBMW)); 1769*0Sstevel@tonic-gate continue; 1770*0Sstevel@tonic-gate } 1771*0Sstevel@tonic-gate 1772*0Sstevel@tonic-gate if (!page_try_reclaim_lock(pp, SE_EXCL, 1773*0Sstevel@tonic-gate SE_EXCL_WANTED)) { 1774*0Sstevel@tonic-gate if (page_isretired(pp)) { 1775*0Sstevel@tonic-gate /* 1776*0Sstevel@tonic-gate * Page has been retired. 1777*0Sstevel@tonic-gate * 1778*0Sstevel@tonic-gate * Its shared lock can and 1779*0Sstevel@tonic-gate * must be upgraded to an 1780*0Sstevel@tonic-gate * exclusive lock in order 1781*0Sstevel@tonic-gate * to hashout the page when 1782*0Sstevel@tonic-gate * the delete completes. 1783*0Sstevel@tonic-gate */ 1784*0Sstevel@tonic-gate page_lock_clr_exclwanted(pp); 1785*0Sstevel@tonic-gate if (!page_tryupgrade(pp)) { 1786*0Sstevel@tonic-gate mutex_enter( 1787*0Sstevel@tonic-gate &mhp->mh_mutex); 1788*0Sstevel@tonic-gate continue; 1789*0Sstevel@tonic-gate } 1790*0Sstevel@tonic-gate } else { 1791*0Sstevel@tonic-gate /* 1792*0Sstevel@tonic-gate * Page in use elsewhere. 1793*0Sstevel@tonic-gate */ 1794*0Sstevel@tonic-gate MDSTAT_INCR(mhp, lockfail); 1795*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 1796*0Sstevel@tonic-gate continue; 1797*0Sstevel@tonic-gate } 1798*0Sstevel@tonic-gate } 1799*0Sstevel@tonic-gate /* 1800*0Sstevel@tonic-gate * See if the cage expanded into the delete. 1801*0Sstevel@tonic-gate * This can happen as we have to allow the 1802*0Sstevel@tonic-gate * cage to expand. 1803*0Sstevel@tonic-gate */ 1804*0Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 1805*0Sstevel@tonic-gate if (page_isretired(pp)) 1806*0Sstevel@tonic-gate page_downgrade(pp); 1807*0Sstevel@tonic-gate else 1808*0Sstevel@tonic-gate page_unlock(pp); 1809*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 1810*0Sstevel@tonic-gate mhp->mh_cancel = KPHYSM_ENONRELOC; 1811*0Sstevel@tonic-gate break; 1812*0Sstevel@tonic-gate } 1813*0Sstevel@tonic-gate if (page_isretired(pp)) { 1814*0Sstevel@tonic-gate /* 1815*0Sstevel@tonic-gate * Page has been retired and is 1816*0Sstevel@tonic-gate * not part of the cage so we 1817*0Sstevel@tonic-gate * can now do the accounting for 1818*0Sstevel@tonic-gate * it. 1819*0Sstevel@tonic-gate */ 1820*0Sstevel@tonic-gate MDSTAT_INCR(mhp, retired); 1821*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 1822*0Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] 1823*0Sstevel@tonic-gate |= (1 << (bit % NBPBMW)); 1824*0Sstevel@tonic-gate mdsp->mds_bitmap_retired[bit / 1825*0Sstevel@tonic-gate NBPBMW] |= 1826*0Sstevel@tonic-gate (1 << (bit % NBPBMW)); 1827*0Sstevel@tonic-gate mhp->mh_hold_todo--; 1828*0Sstevel@tonic-gate continue; 1829*0Sstevel@tonic-gate } 1830*0Sstevel@tonic-gate ASSERT(freemem_left != 0); 1831*0Sstevel@tonic-gate if (PP_ISFREE(pp)) { 1832*0Sstevel@tonic-gate /* 1833*0Sstevel@tonic-gate * Like page_reclaim() only 'freemem' 1834*0Sstevel@tonic-gate * processing is already done. 1835*0Sstevel@tonic-gate */ 1836*0Sstevel@tonic-gate MDSTAT_INCR(mhp, nfree); 1837*0Sstevel@tonic-gate free_page_collect: 1838*0Sstevel@tonic-gate if (PP_ISAGED(pp)) { 1839*0Sstevel@tonic-gate page_list_sub(pp, 1840*0Sstevel@tonic-gate PG_FREE_LIST); 1841*0Sstevel@tonic-gate } else { 1842*0Sstevel@tonic-gate page_list_sub(pp, 1843*0Sstevel@tonic-gate PG_CACHE_LIST); 1844*0Sstevel@tonic-gate } 1845*0Sstevel@tonic-gate PP_CLRFREE(pp); 1846*0Sstevel@tonic-gate PP_CLRAGED(pp); 1847*0Sstevel@tonic-gate collected++; 1848*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 1849*0Sstevel@tonic-gate page_delete_collect(pp, mhp); 1850*0Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |= 1851*0Sstevel@tonic-gate (1 << (bit % NBPBMW)); 1852*0Sstevel@tonic-gate freemem_left--; 1853*0Sstevel@tonic-gate continue; 1854*0Sstevel@tonic-gate } 1855*0Sstevel@tonic-gate ASSERT(pp->p_vnode != NULL); 1856*0Sstevel@tonic-gate if (first_scan) { 1857*0Sstevel@tonic-gate MDSTAT_INCR(mhp, first_notfree); 1858*0Sstevel@tonic-gate page_unlock(pp); 1859*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 1860*0Sstevel@tonic-gate continue; 1861*0Sstevel@tonic-gate } 1862*0Sstevel@tonic-gate /* 1863*0Sstevel@tonic-gate * Keep stats on pages encountered that 1864*0Sstevel@tonic-gate * are toxic or failing but not retired. 1865*0Sstevel@tonic-gate */ 1866*0Sstevel@tonic-gate if (page_istoxic(pp)) { 1867*0Sstevel@tonic-gate MDSTAT_INCR(mhp, toxic); 1868*0Sstevel@tonic-gate } else if (page_isfailing(pp)) { 1869*0Sstevel@tonic-gate MDSTAT_INCR(mhp, failing); 1870*0Sstevel@tonic-gate } 1871*0Sstevel@tonic-gate /* 1872*0Sstevel@tonic-gate * In certain cases below, special exceptions 1873*0Sstevel@tonic-gate * are made for pages that are toxic. This 1874*0Sstevel@tonic-gate * is because the current meaning of toxic 1875*0Sstevel@tonic-gate * is that an uncorrectable error has been 1876*0Sstevel@tonic-gate * previously associated with the page. 1877*0Sstevel@tonic-gate */ 1878*0Sstevel@tonic-gate if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) { 1879*0Sstevel@tonic-gate if (!page_istoxic(pp)) { 1880*0Sstevel@tonic-gate /* 1881*0Sstevel@tonic-gate * Must relocate locked in 1882*0Sstevel@tonic-gate * memory pages. 1883*0Sstevel@tonic-gate */ 1884*0Sstevel@tonic-gate #ifdef MEM_DEL_STATS 1885*0Sstevel@tonic-gate start_pgrp = ddi_get_lbolt(); 1886*0Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 1887*0Sstevel@tonic-gate /* 1888*0Sstevel@tonic-gate * Lock all constituent pages 1889*0Sstevel@tonic-gate * of a large page to ensure 1890*0Sstevel@tonic-gate * that p_szc won't change. 1891*0Sstevel@tonic-gate */ 1892*0Sstevel@tonic-gate if (!group_page_trylock(pp, 1893*0Sstevel@tonic-gate SE_EXCL)) { 1894*0Sstevel@tonic-gate MDSTAT_INCR(mhp, 1895*0Sstevel@tonic-gate gptllckfail); 1896*0Sstevel@tonic-gate page_unlock(pp); 1897*0Sstevel@tonic-gate mutex_enter( 1898*0Sstevel@tonic-gate &mhp->mh_mutex); 1899*0Sstevel@tonic-gate continue; 1900*0Sstevel@tonic-gate } 1901*0Sstevel@tonic-gate MDSTAT_INCR(mhp, npplocked); 1902*0Sstevel@tonic-gate pp_targ = 1903*0Sstevel@tonic-gate page_get_replacement_page( 1904*0Sstevel@tonic-gate pp, NULL, 0); 1905*0Sstevel@tonic-gate if (pp_targ != NULL) { 1906*0Sstevel@tonic-gate #ifdef MEM_DEL_STATS 1907*0Sstevel@tonic-gate ntick_pgrp = 1908*0Sstevel@tonic-gate (uint64_t) 1909*0Sstevel@tonic-gate ddi_get_lbolt() - 1910*0Sstevel@tonic-gate start_pgrp; 1911*0Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 1912*0Sstevel@tonic-gate MDSTAT_PGRP(mhp, 1913*0Sstevel@tonic-gate ntick_pgrp); 1914*0Sstevel@tonic-gate MDSTAT_INCR(mhp, 1915*0Sstevel@tonic-gate nlockreloc); 1916*0Sstevel@tonic-gate goto reloc; 1917*0Sstevel@tonic-gate } 1918*0Sstevel@tonic-gate group_page_unlock(pp); 1919*0Sstevel@tonic-gate page_unlock(pp); 1920*0Sstevel@tonic-gate #ifdef MEM_DEL_STATS 1921*0Sstevel@tonic-gate ntick_pgrp = 1922*0Sstevel@tonic-gate (uint64_t)ddi_get_lbolt() - 1923*0Sstevel@tonic-gate start_pgrp; 1924*0Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 1925*0Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 1926*0Sstevel@tonic-gate MDSTAT_INCR(mhp, nnorepl); 1927*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 1928*0Sstevel@tonic-gate continue; 1929*0Sstevel@tonic-gate } else { 1930*0Sstevel@tonic-gate /* 1931*0Sstevel@tonic-gate * Cannot do anything about 1932*0Sstevel@tonic-gate * this page because it is 1933*0Sstevel@tonic-gate * toxic. 1934*0Sstevel@tonic-gate */ 1935*0Sstevel@tonic-gate MDSTAT_INCR(mhp, npplkdtoxic); 1936*0Sstevel@tonic-gate page_unlock(pp); 1937*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 1938*0Sstevel@tonic-gate continue; 1939*0Sstevel@tonic-gate } 1940*0Sstevel@tonic-gate } 1941*0Sstevel@tonic-gate /* 1942*0Sstevel@tonic-gate * Unload the mappings and check if mod bit 1943*0Sstevel@tonic-gate * is set. 1944*0Sstevel@tonic-gate */ 1945*0Sstevel@tonic-gate ASSERT(pp->p_vnode != &kvp); 1946*0Sstevel@tonic-gate (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD); 1947*0Sstevel@tonic-gate mod = hat_ismod(pp); 1948*0Sstevel@tonic-gate 1949*0Sstevel@tonic-gate #ifdef MEM_DEL_STATS 1950*0Sstevel@tonic-gate start_pgrp = ddi_get_lbolt(); 1951*0Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 1952*0Sstevel@tonic-gate if (mod && !page_istoxic(pp)) { 1953*0Sstevel@tonic-gate /* 1954*0Sstevel@tonic-gate * Lock all constituent pages 1955*0Sstevel@tonic-gate * of a large page to ensure 1956*0Sstevel@tonic-gate * that p_szc won't change. 1957*0Sstevel@tonic-gate */ 1958*0Sstevel@tonic-gate if (!group_page_trylock(pp, SE_EXCL)) { 1959*0Sstevel@tonic-gate MDSTAT_INCR(mhp, gptlmodfail); 1960*0Sstevel@tonic-gate page_unlock(pp); 1961*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 1962*0Sstevel@tonic-gate continue; 1963*0Sstevel@tonic-gate } 1964*0Sstevel@tonic-gate pp_targ = page_get_replacement_page(pp, 1965*0Sstevel@tonic-gate NULL, 0); 1966*0Sstevel@tonic-gate if (pp_targ != NULL) { 1967*0Sstevel@tonic-gate MDSTAT_INCR(mhp, nmodreloc); 1968*0Sstevel@tonic-gate #ifdef MEM_DEL_STATS 1969*0Sstevel@tonic-gate ntick_pgrp = 1970*0Sstevel@tonic-gate (uint64_t)ddi_get_lbolt() - 1971*0Sstevel@tonic-gate start_pgrp; 1972*0Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 1973*0Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 1974*0Sstevel@tonic-gate goto reloc; 1975*0Sstevel@tonic-gate } 1976*0Sstevel@tonic-gate group_page_unlock(pp); 1977*0Sstevel@tonic-gate } 1978*0Sstevel@tonic-gate 1979*0Sstevel@tonic-gate if (!page_try_demote_pages(pp)) { 1980*0Sstevel@tonic-gate MDSTAT_INCR(mhp, demotefail); 1981*0Sstevel@tonic-gate page_unlock(pp); 1982*0Sstevel@tonic-gate #ifdef MEM_DEL_STATS 1983*0Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() - 1984*0Sstevel@tonic-gate start_pgrp; 1985*0Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 1986*0Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 1987*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 1988*0Sstevel@tonic-gate continue; 1989*0Sstevel@tonic-gate } 1990*0Sstevel@tonic-gate 1991*0Sstevel@tonic-gate /* 1992*0Sstevel@tonic-gate * Regular 'page-out'. 1993*0Sstevel@tonic-gate */ 1994*0Sstevel@tonic-gate if (!mod) { 1995*0Sstevel@tonic-gate MDSTAT_INCR(mhp, ndestroy); 1996*0Sstevel@tonic-gate page_destroy(pp, 1); 1997*0Sstevel@tonic-gate /* 1998*0Sstevel@tonic-gate * page_destroy was called with 1999*0Sstevel@tonic-gate * dontfree. As long as p_lckcnt 2000*0Sstevel@tonic-gate * and p_cowcnt are both zero, the 2001*0Sstevel@tonic-gate * only additional action of 2002*0Sstevel@tonic-gate * page_destroy with !dontfree is to 2003*0Sstevel@tonic-gate * call page_free, so we can collect 2004*0Sstevel@tonic-gate * the page here. 2005*0Sstevel@tonic-gate */ 2006*0Sstevel@tonic-gate collected++; 2007*0Sstevel@tonic-gate #ifdef MEM_DEL_STATS 2008*0Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() - 2009*0Sstevel@tonic-gate start_pgrp; 2010*0Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 2011*0Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 2012*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 2013*0Sstevel@tonic-gate page_delete_collect(pp, mhp); 2014*0Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |= 2015*0Sstevel@tonic-gate (1 << (bit % NBPBMW)); 2016*0Sstevel@tonic-gate continue; 2017*0Sstevel@tonic-gate } 2018*0Sstevel@tonic-gate /* 2019*0Sstevel@tonic-gate * The page is toxic and the mod bit is 2020*0Sstevel@tonic-gate * set, we cannot do anything here to deal 2021*0Sstevel@tonic-gate * with it. 2022*0Sstevel@tonic-gate */ 2023*0Sstevel@tonic-gate if (page_istoxic(pp)) { 2024*0Sstevel@tonic-gate page_unlock(pp); 2025*0Sstevel@tonic-gate #ifdef MEM_DEL_STATS 2026*0Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() - 2027*0Sstevel@tonic-gate start_pgrp; 2028*0Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 2029*0Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 2030*0Sstevel@tonic-gate MDSTAT_INCR(mhp, modtoxic); 2031*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 2032*0Sstevel@tonic-gate continue; 2033*0Sstevel@tonic-gate } 2034*0Sstevel@tonic-gate MDSTAT_INCR(mhp, nputpage); 2035*0Sstevel@tonic-gate vp = pp->p_vnode; 2036*0Sstevel@tonic-gate offset = pp->p_offset; 2037*0Sstevel@tonic-gate VN_HOLD(vp); 2038*0Sstevel@tonic-gate page_unlock(pp); 2039*0Sstevel@tonic-gate (void) VOP_PUTPAGE(vp, offset, PAGESIZE, 2040*0Sstevel@tonic-gate B_INVAL|B_FORCE, kcred); 2041*0Sstevel@tonic-gate VN_RELE(vp); 2042*0Sstevel@tonic-gate #ifdef MEM_DEL_STATS 2043*0Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() - 2044*0Sstevel@tonic-gate start_pgrp; 2045*0Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 2046*0Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 2047*0Sstevel@tonic-gate /* 2048*0Sstevel@tonic-gate * Try to get the page back immediately 2049*0Sstevel@tonic-gate * so that it can be collected. 2050*0Sstevel@tonic-gate */ 2051*0Sstevel@tonic-gate pp = page_numtopp_nolock(pfn); 2052*0Sstevel@tonic-gate if (pp == NULL) { 2053*0Sstevel@tonic-gate MDSTAT_INCR(mhp, nnoreclaim); 2054*0Sstevel@tonic-gate /* 2055*0Sstevel@tonic-gate * This should not happen as this 2056*0Sstevel@tonic-gate * thread is deleting the page. 2057*0Sstevel@tonic-gate * If this code is generalized, this 2058*0Sstevel@tonic-gate * becomes a reality. 2059*0Sstevel@tonic-gate */ 2060*0Sstevel@tonic-gate #ifdef DEBUG 2061*0Sstevel@tonic-gate cmn_err(CE_WARN, 2062*0Sstevel@tonic-gate "delete_memory_thread(0x%p) " 2063*0Sstevel@tonic-gate "pfn 0x%lx has no page_t", 2064*0Sstevel@tonic-gate (void *)mhp, pfn); 2065*0Sstevel@tonic-gate #endif /* DEBUG */ 2066*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 2067*0Sstevel@tonic-gate continue; 2068*0Sstevel@tonic-gate } 2069*0Sstevel@tonic-gate if (page_try_reclaim_lock(pp, SE_EXCL, 2070*0Sstevel@tonic-gate SE_EXCL_WANTED)) { 2071*0Sstevel@tonic-gate if (PP_ISFREE(pp)) { 2072*0Sstevel@tonic-gate goto free_page_collect; 2073*0Sstevel@tonic-gate } 2074*0Sstevel@tonic-gate page_unlock(pp); 2075*0Sstevel@tonic-gate } 2076*0Sstevel@tonic-gate MDSTAT_INCR(mhp, nnoreclaim); 2077*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 2078*0Sstevel@tonic-gate continue; 2079*0Sstevel@tonic-gate 2080*0Sstevel@tonic-gate reloc: 2081*0Sstevel@tonic-gate /* 2082*0Sstevel@tonic-gate * Got some freemem and a target 2083*0Sstevel@tonic-gate * page, so move the data to avoid 2084*0Sstevel@tonic-gate * I/O and lock problems. 2085*0Sstevel@tonic-gate */ 2086*0Sstevel@tonic-gate ASSERT(!page_iolock_assert(pp)); 2087*0Sstevel@tonic-gate MDSTAT_INCR(mhp, nreloc); 2088*0Sstevel@tonic-gate /* 2089*0Sstevel@tonic-gate * page_relocate() will return pgcnt: the 2090*0Sstevel@tonic-gate * number of consecutive pages relocated. 2091*0Sstevel@tonic-gate * If it is successful, pp will be a 2092*0Sstevel@tonic-gate * linked list of the page structs that 2093*0Sstevel@tonic-gate * were relocated. If page_relocate() is 2094*0Sstevel@tonic-gate * unsuccessful, pp will be unmodified. 2095*0Sstevel@tonic-gate */ 2096*0Sstevel@tonic-gate #ifdef MEM_DEL_STATS 2097*0Sstevel@tonic-gate start_pgrp = ddi_get_lbolt(); 2098*0Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 2099*0Sstevel@tonic-gate result = page_relocate(&pp, &pp_targ, 0, 0, 2100*0Sstevel@tonic-gate &pgcnt, NULL); 2101*0Sstevel@tonic-gate #ifdef MEM_DEL_STATS 2102*0Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() - 2103*0Sstevel@tonic-gate start_pgrp; 2104*0Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 2105*0Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 2106*0Sstevel@tonic-gate if (result != 0) { 2107*0Sstevel@tonic-gate MDSTAT_INCR(mhp, nrelocfail); 2108*0Sstevel@tonic-gate /* 2109*0Sstevel@tonic-gate * We did not succeed. We need 2110*0Sstevel@tonic-gate * to give the pp_targ pages back. 2111*0Sstevel@tonic-gate * page_free(pp_targ, 1) without 2112*0Sstevel@tonic-gate * the freemem accounting. 2113*0Sstevel@tonic-gate */ 2114*0Sstevel@tonic-gate group_page_unlock(pp); 2115*0Sstevel@tonic-gate page_free_replacement_page(pp_targ); 2116*0Sstevel@tonic-gate page_unlock(pp); 2117*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 2118*0Sstevel@tonic-gate continue; 2119*0Sstevel@tonic-gate } 2120*0Sstevel@tonic-gate 2121*0Sstevel@tonic-gate /* 2122*0Sstevel@tonic-gate * We will then collect pgcnt pages. 2123*0Sstevel@tonic-gate */ 2124*0Sstevel@tonic-gate ASSERT(pgcnt > 0); 2125*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 2126*0Sstevel@tonic-gate /* 2127*0Sstevel@tonic-gate * We need to make sure freemem_left is 2128*0Sstevel@tonic-gate * large enough. 2129*0Sstevel@tonic-gate */ 2130*0Sstevel@tonic-gate while ((freemem_left < pgcnt) && 2131*0Sstevel@tonic-gate (!mhp->mh_cancel)) { 2132*0Sstevel@tonic-gate freemem_left += 2133*0Sstevel@tonic-gate delthr_get_freemem(mhp); 2134*0Sstevel@tonic-gate } 2135*0Sstevel@tonic-gate 2136*0Sstevel@tonic-gate /* 2137*0Sstevel@tonic-gate * Do not proceed if mh_cancel is set. 2138*0Sstevel@tonic-gate */ 2139*0Sstevel@tonic-gate if (mhp->mh_cancel) { 2140*0Sstevel@tonic-gate while (pp_targ != NULL) { 2141*0Sstevel@tonic-gate /* 2142*0Sstevel@tonic-gate * Unlink and unlock each page. 2143*0Sstevel@tonic-gate */ 2144*0Sstevel@tonic-gate tpp_targ = pp_targ; 2145*0Sstevel@tonic-gate page_sub(&pp_targ, tpp_targ); 2146*0Sstevel@tonic-gate page_unlock(tpp_targ); 2147*0Sstevel@tonic-gate } 2148*0Sstevel@tonic-gate /* 2149*0Sstevel@tonic-gate * We need to give the pp pages back. 2150*0Sstevel@tonic-gate * page_free(pp, 1) without the 2151*0Sstevel@tonic-gate * freemem accounting. 2152*0Sstevel@tonic-gate */ 2153*0Sstevel@tonic-gate page_free_replacement_page(pp); 2154*0Sstevel@tonic-gate break; 2155*0Sstevel@tonic-gate } 2156*0Sstevel@tonic-gate 2157*0Sstevel@tonic-gate /* Now remove pgcnt from freemem_left */ 2158*0Sstevel@tonic-gate freemem_left -= pgcnt; 2159*0Sstevel@tonic-gate ASSERT(freemem_left >= 0); 2160*0Sstevel@tonic-gate szc = pp->p_szc; 2161*0Sstevel@tonic-gate while (pp != NULL) { 2162*0Sstevel@tonic-gate /* 2163*0Sstevel@tonic-gate * pp and pp_targ were passed back as 2164*0Sstevel@tonic-gate * a linked list of pages. 2165*0Sstevel@tonic-gate * Unlink and unlock each page. 2166*0Sstevel@tonic-gate */ 2167*0Sstevel@tonic-gate tpp_targ = pp_targ; 2168*0Sstevel@tonic-gate page_sub(&pp_targ, tpp_targ); 2169*0Sstevel@tonic-gate page_unlock(tpp_targ); 2170*0Sstevel@tonic-gate /* 2171*0Sstevel@tonic-gate * The original page is now free 2172*0Sstevel@tonic-gate * so remove it from the linked 2173*0Sstevel@tonic-gate * list and collect it. 2174*0Sstevel@tonic-gate */ 2175*0Sstevel@tonic-gate tpp = pp; 2176*0Sstevel@tonic-gate page_sub(&pp, tpp); 2177*0Sstevel@tonic-gate pfn = page_pptonum(tpp); 2178*0Sstevel@tonic-gate collected++; 2179*0Sstevel@tonic-gate ASSERT(PAGE_EXCL(tpp)); 2180*0Sstevel@tonic-gate ASSERT(tpp->p_vnode == NULL); 2181*0Sstevel@tonic-gate ASSERT(!hat_page_is_mapped(tpp)); 2182*0Sstevel@tonic-gate ASSERT(tpp->p_szc == szc); 2183*0Sstevel@tonic-gate tpp->p_szc = 0; 2184*0Sstevel@tonic-gate page_delete_collect(tpp, mhp); 2185*0Sstevel@tonic-gate bit = pfn - mdsp->mds_base; 2186*0Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |= 2187*0Sstevel@tonic-gate (1 << (bit % NBPBMW)); 2188*0Sstevel@tonic-gate } 2189*0Sstevel@tonic-gate ASSERT(pp_targ == NULL); 2190*0Sstevel@tonic-gate } 2191*0Sstevel@tonic-gate } 2192*0Sstevel@tonic-gate first_scan = 0; 2193*0Sstevel@tonic-gate if ((mhp->mh_cancel == 0) && (mhp->mh_hold_todo != 0) && 2194*0Sstevel@tonic-gate (collected == 0)) { 2195*0Sstevel@tonic-gate /* 2196*0Sstevel@tonic-gate * This code is needed as we cannot wait 2197*0Sstevel@tonic-gate * for a page to be locked OR the delete to 2198*0Sstevel@tonic-gate * be cancelled. Also, we must delay so 2199*0Sstevel@tonic-gate * that other threads get a chance to run 2200*0Sstevel@tonic-gate * on our cpu, otherwise page locks may be 2201*0Sstevel@tonic-gate * held indefinitely by those threads. 2202*0Sstevel@tonic-gate */ 2203*0Sstevel@tonic-gate MDSTAT_INCR(mhp, ndelay); 2204*0Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 2205*0Sstevel@tonic-gate (void) cv_timedwait(&mhp->mh_cv, &mhp->mh_mutex, 2206*0Sstevel@tonic-gate (lbolt + DEL_BUSY_WAIT_TICKS)); 2207*0Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &mhp->mh_mutex); 2208*0Sstevel@tonic-gate } 2209*0Sstevel@tonic-gate } 2210*0Sstevel@tonic-gate /* stop the dr aio cleanup thread */ 2211*0Sstevel@tonic-gate mhp->mh_dr_aio_cleanup_cancel = 1; 2212*0Sstevel@tonic-gate transit_list_collect(mhp, 0); 2213*0Sstevel@tonic-gate if (freemem_left != 0) { 2214*0Sstevel@tonic-gate /* Return any surplus. */ 2215*0Sstevel@tonic-gate page_create_putback(freemem_left); 2216*0Sstevel@tonic-gate freemem_left = 0; 2217*0Sstevel@tonic-gate } 2218*0Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 2219*0Sstevel@tonic-gate mdsp = mdsp->mds_next) { 2220*0Sstevel@tonic-gate mem_node_post_del_slice(mdsp->mds_base, 2221*0Sstevel@tonic-gate mdsp->mds_base + mdsp->mds_npgs - 1, 2222*0Sstevel@tonic-gate (mhp->mh_cancel != 0)); 2223*0Sstevel@tonic-gate } 2224*0Sstevel@tonic-gate #ifdef MEM_DEL_STATS 2225*0Sstevel@tonic-gate ntick_total = (uint64_t)ddi_get_lbolt() - start_total; 2226*0Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 2227*0Sstevel@tonic-gate MDSTAT_TOTAL(mhp, ntick_total); 2228*0Sstevel@tonic-gate MDSTAT_PRINT(mhp); 2229*0Sstevel@tonic-gate 2230*0Sstevel@tonic-gate /* 2231*0Sstevel@tonic-gate * If the memory delete was cancelled, exclusive-wanted bits must 2232*0Sstevel@tonic-gate * be cleared, and also any retired pages that 2233*0Sstevel@tonic-gate * were accounted for above must have their exclusive lock 2234*0Sstevel@tonic-gate * downgraded to a shared lock to return them to their previous 2235*0Sstevel@tonic-gate * state. 2236*0Sstevel@tonic-gate * Otherwise, if the memory delete has completed, retired pages 2237*0Sstevel@tonic-gate * must be hashed out. 2238*0Sstevel@tonic-gate */ 2239*0Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 2240*0Sstevel@tonic-gate mdsp = mdsp->mds_next) { 2241*0Sstevel@tonic-gate pfn_t pfn, p_end; 2242*0Sstevel@tonic-gate 2243*0Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs; 2244*0Sstevel@tonic-gate for (pfn = mdsp->mds_base; pfn < p_end; pfn++) { 2245*0Sstevel@tonic-gate page_t *pp; 2246*0Sstevel@tonic-gate pgcnt_t bit; 2247*0Sstevel@tonic-gate 2248*0Sstevel@tonic-gate bit = pfn - mdsp->mds_base; 2249*0Sstevel@tonic-gate if (mhp->mh_cancel) { 2250*0Sstevel@tonic-gate pp = page_numtopp_nolock(pfn); 2251*0Sstevel@tonic-gate if (pp != NULL) { 2252*0Sstevel@tonic-gate if ((mdsp->mds_bitmap[bit / NBPBMW] & 2253*0Sstevel@tonic-gate (1 << (bit % NBPBMW))) == 0) { 2254*0Sstevel@tonic-gate page_lock_clr_exclwanted(pp); 2255*0Sstevel@tonic-gate } 2256*0Sstevel@tonic-gate } 2257*0Sstevel@tonic-gate } else { 2258*0Sstevel@tonic-gate pp = NULL; 2259*0Sstevel@tonic-gate } 2260*0Sstevel@tonic-gate if ((mdsp->mds_bitmap_retired[bit / NBPBMW] & 2261*0Sstevel@tonic-gate (1 << (bit % NBPBMW))) != 0) { 2262*0Sstevel@tonic-gate /* do we already have pp? */ 2263*0Sstevel@tonic-gate if (pp == NULL) { 2264*0Sstevel@tonic-gate pp = page_numtopp_nolock(pfn); 2265*0Sstevel@tonic-gate } 2266*0Sstevel@tonic-gate ASSERT(pp != NULL); 2267*0Sstevel@tonic-gate ASSERT(page_isretired(pp)); 2268*0Sstevel@tonic-gate if (mhp->mh_cancel != 0) { 2269*0Sstevel@tonic-gate page_downgrade(pp); 2270*0Sstevel@tonic-gate /* 2271*0Sstevel@tonic-gate * To satisfy ASSERT below in 2272*0Sstevel@tonic-gate * cancel code. 2273*0Sstevel@tonic-gate */ 2274*0Sstevel@tonic-gate mhp->mh_hold_todo++; 2275*0Sstevel@tonic-gate } else { 2276*0Sstevel@tonic-gate page_hashout(pp, (kmutex_t *)NULL); 2277*0Sstevel@tonic-gate } 2278*0Sstevel@tonic-gate } 2279*0Sstevel@tonic-gate } 2280*0Sstevel@tonic-gate } 2281*0Sstevel@tonic-gate /* 2282*0Sstevel@tonic-gate * Free retired page bitmap and collected page bitmap 2283*0Sstevel@tonic-gate */ 2284*0Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 2285*0Sstevel@tonic-gate mdsp = mdsp->mds_next) { 2286*0Sstevel@tonic-gate ASSERT(mdsp->mds_bitmap_retired != NULL); 2287*0Sstevel@tonic-gate kmem_free(mdsp->mds_bitmap_retired, MDS_BITMAPBYTES(mdsp)); 2288*0Sstevel@tonic-gate mdsp->mds_bitmap_retired = NULL; /* Paranoia. */ 2289*0Sstevel@tonic-gate ASSERT(mdsp->mds_bitmap != NULL); 2290*0Sstevel@tonic-gate kmem_free(mdsp->mds_bitmap, MDS_BITMAPBYTES(mdsp)); 2291*0Sstevel@tonic-gate mdsp->mds_bitmap = NULL; /* Paranoia. */ 2292*0Sstevel@tonic-gate } 2293*0Sstevel@tonic-gate 2294*0Sstevel@tonic-gate /* wait for our dr aio cancel thread to exit */ 2295*0Sstevel@tonic-gate while (!(mhp->mh_aio_cleanup_done)) { 2296*0Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 2297*0Sstevel@tonic-gate delay(drv_usectohz(DR_AIO_CLEANUP_DELAY)); 2298*0Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &mhp->mh_mutex); 2299*0Sstevel@tonic-gate } 2300*0Sstevel@tonic-gate refused: 2301*0Sstevel@tonic-gate if (mhp->mh_cancel != 0) { 2302*0Sstevel@tonic-gate page_t *pp; 2303*0Sstevel@tonic-gate 2304*0Sstevel@tonic-gate comp_code = mhp->mh_cancel; 2305*0Sstevel@tonic-gate /* 2306*0Sstevel@tonic-gate * Go through list of deleted pages (mh_deleted) freeing 2307*0Sstevel@tonic-gate * them. 2308*0Sstevel@tonic-gate */ 2309*0Sstevel@tonic-gate while ((pp = mhp->mh_deleted) != NULL) { 2310*0Sstevel@tonic-gate mhp->mh_deleted = pp->p_next; 2311*0Sstevel@tonic-gate mhp->mh_hold_todo++; 2312*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 2313*0Sstevel@tonic-gate /* Restore p_next. */ 2314*0Sstevel@tonic-gate pp->p_next = pp->p_prev; 2315*0Sstevel@tonic-gate if (PP_ISFREE(pp)) { 2316*0Sstevel@tonic-gate cmn_err(CE_PANIC, 2317*0Sstevel@tonic-gate "page %p is free", 2318*0Sstevel@tonic-gate (void *)pp); 2319*0Sstevel@tonic-gate } 2320*0Sstevel@tonic-gate page_free(pp, 1); 2321*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 2322*0Sstevel@tonic-gate } 2323*0Sstevel@tonic-gate ASSERT(mhp->mh_hold_todo == mhp->mh_vm_pages); 2324*0Sstevel@tonic-gate 2325*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 2326*0Sstevel@tonic-gate put_availrmem(mhp->mh_vm_pages); 2327*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 2328*0Sstevel@tonic-gate 2329*0Sstevel@tonic-gate goto t_exit; 2330*0Sstevel@tonic-gate } 2331*0Sstevel@tonic-gate 2332*0Sstevel@tonic-gate /* 2333*0Sstevel@tonic-gate * All the pages are no longer in use and are exclusively locked. 2334*0Sstevel@tonic-gate */ 2335*0Sstevel@tonic-gate 2336*0Sstevel@tonic-gate mhp->mh_deleted = NULL; 2337*0Sstevel@tonic-gate 2338*0Sstevel@tonic-gate kphysm_del_cleanup(mhp); 2339*0Sstevel@tonic-gate 2340*0Sstevel@tonic-gate comp_code = KPHYSM_OK; 2341*0Sstevel@tonic-gate 2342*0Sstevel@tonic-gate t_exit: 2343*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 2344*0Sstevel@tonic-gate kphysm_setup_post_del(mhp->mh_vm_pages, 2345*0Sstevel@tonic-gate (comp_code == KPHYSM_OK) ? 0 : 1); 2346*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 2347*0Sstevel@tonic-gate 2348*0Sstevel@tonic-gate early_exit: 2349*0Sstevel@tonic-gate /* mhp->mh_mutex exited by CALLB_CPR_EXIT() */ 2350*0Sstevel@tonic-gate mhp->mh_state = MHND_DONE; 2351*0Sstevel@tonic-gate del_complete_funcp = mhp->mh_delete_complete; 2352*0Sstevel@tonic-gate del_complete_arg = mhp->mh_delete_complete_arg; 2353*0Sstevel@tonic-gate CALLB_CPR_EXIT(&cprinfo); 2354*0Sstevel@tonic-gate (*del_complete_funcp)(del_complete_arg, comp_code); 2355*0Sstevel@tonic-gate thread_exit(); 2356*0Sstevel@tonic-gate /*NOTREACHED*/ 2357*0Sstevel@tonic-gate } 2358*0Sstevel@tonic-gate 2359*0Sstevel@tonic-gate /* 2360*0Sstevel@tonic-gate * Start the delete of the memory from the system. 2361*0Sstevel@tonic-gate */ 2362*0Sstevel@tonic-gate int 2363*0Sstevel@tonic-gate kphysm_del_start( 2364*0Sstevel@tonic-gate memhandle_t handle, 2365*0Sstevel@tonic-gate void (*complete)(void *, int), 2366*0Sstevel@tonic-gate void *complete_arg) 2367*0Sstevel@tonic-gate { 2368*0Sstevel@tonic-gate struct mem_handle *mhp; 2369*0Sstevel@tonic-gate 2370*0Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle); 2371*0Sstevel@tonic-gate if (mhp == NULL) { 2372*0Sstevel@tonic-gate return (KPHYSM_EHANDLE); 2373*0Sstevel@tonic-gate } 2374*0Sstevel@tonic-gate switch (mhp->mh_state) { 2375*0Sstevel@tonic-gate case MHND_FREE: 2376*0Sstevel@tonic-gate ASSERT(mhp->mh_state != MHND_FREE); 2377*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 2378*0Sstevel@tonic-gate return (KPHYSM_EHANDLE); 2379*0Sstevel@tonic-gate case MHND_INIT: 2380*0Sstevel@tonic-gate break; 2381*0Sstevel@tonic-gate case MHND_STARTING: 2382*0Sstevel@tonic-gate case MHND_RUNNING: 2383*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 2384*0Sstevel@tonic-gate return (KPHYSM_ESEQUENCE); 2385*0Sstevel@tonic-gate case MHND_DONE: 2386*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 2387*0Sstevel@tonic-gate return (KPHYSM_ESEQUENCE); 2388*0Sstevel@tonic-gate case MHND_RELEASE: 2389*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 2390*0Sstevel@tonic-gate return (KPHYSM_ESEQUENCE); 2391*0Sstevel@tonic-gate default: 2392*0Sstevel@tonic-gate #ifdef DEBUG 2393*0Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_del_start(0x%p) state corrupt %d", 2394*0Sstevel@tonic-gate (void *)mhp, mhp->mh_state); 2395*0Sstevel@tonic-gate #endif /* DEBUG */ 2396*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 2397*0Sstevel@tonic-gate return (KPHYSM_EHANDLE); 2398*0Sstevel@tonic-gate } 2399*0Sstevel@tonic-gate 2400*0Sstevel@tonic-gate if (mhp->mh_transit.trl_spans == NULL) { 2401*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 2402*0Sstevel@tonic-gate return (KPHYSM_ENOWORK); 2403*0Sstevel@tonic-gate } 2404*0Sstevel@tonic-gate 2405*0Sstevel@tonic-gate ASSERT(complete != NULL); 2406*0Sstevel@tonic-gate mhp->mh_delete_complete = complete; 2407*0Sstevel@tonic-gate mhp->mh_delete_complete_arg = complete_arg; 2408*0Sstevel@tonic-gate mhp->mh_state = MHND_STARTING; 2409*0Sstevel@tonic-gate /* 2410*0Sstevel@tonic-gate * Release the mutex in case thread_create sleeps. 2411*0Sstevel@tonic-gate */ 2412*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 2413*0Sstevel@tonic-gate 2414*0Sstevel@tonic-gate /* 2415*0Sstevel@tonic-gate * The "obvious" process for this thread is pageout (proc_pageout) 2416*0Sstevel@tonic-gate * but this gives the thread too much power over freemem 2417*0Sstevel@tonic-gate * which results in freemem starvation. 2418*0Sstevel@tonic-gate */ 2419*0Sstevel@tonic-gate (void) thread_create(NULL, 0, delete_memory_thread, mhp, 0, &p0, 2420*0Sstevel@tonic-gate TS_RUN, maxclsyspri - 1); 2421*0Sstevel@tonic-gate 2422*0Sstevel@tonic-gate return (KPHYSM_OK); 2423*0Sstevel@tonic-gate } 2424*0Sstevel@tonic-gate 2425*0Sstevel@tonic-gate static kmutex_t pp_dummy_lock; /* Protects init. of pp_dummy. */ 2426*0Sstevel@tonic-gate static caddr_t pp_dummy; 2427*0Sstevel@tonic-gate static pgcnt_t pp_dummy_npages; 2428*0Sstevel@tonic-gate static pfn_t *pp_dummy_pfn; /* Array of dummy pfns. */ 2429*0Sstevel@tonic-gate 2430*0Sstevel@tonic-gate static void 2431*0Sstevel@tonic-gate memseg_remap_init_pages(page_t *pages, page_t *epages) 2432*0Sstevel@tonic-gate { 2433*0Sstevel@tonic-gate page_t *pp; 2434*0Sstevel@tonic-gate 2435*0Sstevel@tonic-gate for (pp = pages; pp < epages; pp++) { 2436*0Sstevel@tonic-gate pp->p_pagenum = PFN_INVALID; /* XXXX */ 2437*0Sstevel@tonic-gate pp->p_offset = (u_offset_t)-1; 2438*0Sstevel@tonic-gate page_iolock_init(pp); 2439*0Sstevel@tonic-gate while (!page_lock(pp, SE_EXCL, (kmutex_t *)NULL, P_RECLAIM)) 2440*0Sstevel@tonic-gate continue; 2441*0Sstevel@tonic-gate page_lock_delete(pp); 2442*0Sstevel@tonic-gate } 2443*0Sstevel@tonic-gate } 2444*0Sstevel@tonic-gate 2445*0Sstevel@tonic-gate void 2446*0Sstevel@tonic-gate memseg_remap_init() 2447*0Sstevel@tonic-gate { 2448*0Sstevel@tonic-gate mutex_enter(&pp_dummy_lock); 2449*0Sstevel@tonic-gate if (pp_dummy == NULL) { 2450*0Sstevel@tonic-gate uint_t dpages; 2451*0Sstevel@tonic-gate int i; 2452*0Sstevel@tonic-gate 2453*0Sstevel@tonic-gate /* 2454*0Sstevel@tonic-gate * dpages starts off as the size of the structure and 2455*0Sstevel@tonic-gate * ends up as the minimum number of pages that will 2456*0Sstevel@tonic-gate * hold a whole number of page_t structures. 2457*0Sstevel@tonic-gate */ 2458*0Sstevel@tonic-gate dpages = sizeof (page_t); 2459*0Sstevel@tonic-gate ASSERT(dpages != 0); 2460*0Sstevel@tonic-gate ASSERT(dpages <= MMU_PAGESIZE); 2461*0Sstevel@tonic-gate 2462*0Sstevel@tonic-gate while ((dpages & 1) == 0) 2463*0Sstevel@tonic-gate dpages >>= 1; 2464*0Sstevel@tonic-gate 2465*0Sstevel@tonic-gate pp_dummy_npages = dpages; 2466*0Sstevel@tonic-gate /* 2467*0Sstevel@tonic-gate * Allocate pp_dummy pages directly from static_arena, 2468*0Sstevel@tonic-gate * since these are whole page allocations and are 2469*0Sstevel@tonic-gate * referenced by physical address. This also has the 2470*0Sstevel@tonic-gate * nice fringe benefit of hiding the memory from 2471*0Sstevel@tonic-gate * ::findleaks since it doesn't deal well with allocated 2472*0Sstevel@tonic-gate * kernel heap memory that doesn't have any mappings. 2473*0Sstevel@tonic-gate */ 2474*0Sstevel@tonic-gate pp_dummy = vmem_xalloc(static_arena, ptob(pp_dummy_npages), 2475*0Sstevel@tonic-gate PAGESIZE, 0, 0, NULL, NULL, VM_SLEEP); 2476*0Sstevel@tonic-gate bzero(pp_dummy, ptob(pp_dummy_npages)); 2477*0Sstevel@tonic-gate ASSERT(((uintptr_t)pp_dummy & MMU_PAGEOFFSET) == 0); 2478*0Sstevel@tonic-gate pp_dummy_pfn = kmem_alloc(sizeof (*pp_dummy_pfn) * 2479*0Sstevel@tonic-gate pp_dummy_npages, KM_SLEEP); 2480*0Sstevel@tonic-gate for (i = 0; i < pp_dummy_npages; i++) { 2481*0Sstevel@tonic-gate pp_dummy_pfn[i] = hat_getpfnum(kas.a_hat, 2482*0Sstevel@tonic-gate &pp_dummy[MMU_PAGESIZE * i]); 2483*0Sstevel@tonic-gate ASSERT(pp_dummy_pfn[i] != PFN_INVALID); 2484*0Sstevel@tonic-gate } 2485*0Sstevel@tonic-gate /* 2486*0Sstevel@tonic-gate * Initialize the page_t's to a known 'deleted' state 2487*0Sstevel@tonic-gate * that matches the state of deleted pages. 2488*0Sstevel@tonic-gate */ 2489*0Sstevel@tonic-gate memseg_remap_init_pages((page_t *)pp_dummy, 2490*0Sstevel@tonic-gate (page_t *)(pp_dummy + 2491*0Sstevel@tonic-gate ptob(pp_dummy_npages))); 2492*0Sstevel@tonic-gate /* Remove kmem mappings for the pages for safety. */ 2493*0Sstevel@tonic-gate hat_unload(kas.a_hat, pp_dummy, ptob(pp_dummy_npages), 2494*0Sstevel@tonic-gate HAT_UNLOAD_UNLOCK); 2495*0Sstevel@tonic-gate /* Leave pp_dummy pointer set as flag that init is done. */ 2496*0Sstevel@tonic-gate } 2497*0Sstevel@tonic-gate mutex_exit(&pp_dummy_lock); 2498*0Sstevel@tonic-gate } 2499*0Sstevel@tonic-gate 2500*0Sstevel@tonic-gate static void 2501*0Sstevel@tonic-gate memseg_remap_to_dummy(caddr_t pp, pgcnt_t metapgs) 2502*0Sstevel@tonic-gate { 2503*0Sstevel@tonic-gate ASSERT(pp_dummy != NULL); 2504*0Sstevel@tonic-gate 2505*0Sstevel@tonic-gate while (metapgs != 0) { 2506*0Sstevel@tonic-gate pgcnt_t n; 2507*0Sstevel@tonic-gate int i; 2508*0Sstevel@tonic-gate 2509*0Sstevel@tonic-gate n = pp_dummy_npages; 2510*0Sstevel@tonic-gate if (n > metapgs) 2511*0Sstevel@tonic-gate n = metapgs; 2512*0Sstevel@tonic-gate for (i = 0; i < n; i++) { 2513*0Sstevel@tonic-gate hat_devload(kas.a_hat, pp, ptob(1), pp_dummy_pfn[i], 2514*0Sstevel@tonic-gate PROT_READ, 2515*0Sstevel@tonic-gate HAT_LOAD | HAT_LOAD_NOCONSIST | 2516*0Sstevel@tonic-gate HAT_LOAD_REMAP); 2517*0Sstevel@tonic-gate pp += ptob(1); 2518*0Sstevel@tonic-gate } 2519*0Sstevel@tonic-gate metapgs -= n; 2520*0Sstevel@tonic-gate } 2521*0Sstevel@tonic-gate } 2522*0Sstevel@tonic-gate 2523*0Sstevel@tonic-gate /* 2524*0Sstevel@tonic-gate * Transition all the deleted pages to the deleted state so that 2525*0Sstevel@tonic-gate * page_lock will not wait. The page_lock_delete call will 2526*0Sstevel@tonic-gate * also wake up any waiters. 2527*0Sstevel@tonic-gate */ 2528*0Sstevel@tonic-gate static void 2529*0Sstevel@tonic-gate memseg_lock_delete_all(struct memseg *seg) 2530*0Sstevel@tonic-gate { 2531*0Sstevel@tonic-gate page_t *pp; 2532*0Sstevel@tonic-gate 2533*0Sstevel@tonic-gate for (pp = seg->pages; pp < seg->epages; pp++) { 2534*0Sstevel@tonic-gate pp->p_pagenum = PFN_INVALID; /* XXXX */ 2535*0Sstevel@tonic-gate page_lock_delete(pp); 2536*0Sstevel@tonic-gate } 2537*0Sstevel@tonic-gate } 2538*0Sstevel@tonic-gate 2539*0Sstevel@tonic-gate static void 2540*0Sstevel@tonic-gate kphysm_del_cleanup(struct mem_handle *mhp) 2541*0Sstevel@tonic-gate { 2542*0Sstevel@tonic-gate struct memdelspan *mdsp; 2543*0Sstevel@tonic-gate struct memseg *seg; 2544*0Sstevel@tonic-gate struct memseg **segpp; 2545*0Sstevel@tonic-gate struct memseg *seglist; 2546*0Sstevel@tonic-gate pfn_t p_end; 2547*0Sstevel@tonic-gate uint64_t avmem; 2548*0Sstevel@tonic-gate pgcnt_t avpgs; 2549*0Sstevel@tonic-gate pgcnt_t npgs; 2550*0Sstevel@tonic-gate 2551*0Sstevel@tonic-gate avpgs = mhp->mh_vm_pages; 2552*0Sstevel@tonic-gate 2553*0Sstevel@tonic-gate memsegs_lock(1); 2554*0Sstevel@tonic-gate 2555*0Sstevel@tonic-gate /* 2556*0Sstevel@tonic-gate * remove from main segment list. 2557*0Sstevel@tonic-gate */ 2558*0Sstevel@tonic-gate npgs = 0; 2559*0Sstevel@tonic-gate seglist = NULL; 2560*0Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 2561*0Sstevel@tonic-gate mdsp = mdsp->mds_next) { 2562*0Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs; 2563*0Sstevel@tonic-gate for (segpp = &memsegs; (seg = *segpp) != NULL; ) { 2564*0Sstevel@tonic-gate if (seg->pages_base >= p_end || 2565*0Sstevel@tonic-gate seg->pages_end <= mdsp->mds_base) { 2566*0Sstevel@tonic-gate /* Span and memseg don't overlap. */ 2567*0Sstevel@tonic-gate segpp = &((*segpp)->next); 2568*0Sstevel@tonic-gate continue; 2569*0Sstevel@tonic-gate } 2570*0Sstevel@tonic-gate ASSERT(seg->pages_base >= mdsp->mds_base); 2571*0Sstevel@tonic-gate ASSERT(seg->pages_end <= p_end); 2572*0Sstevel@tonic-gate 2573*0Sstevel@tonic-gate /* Hide the memseg from future scans. */ 2574*0Sstevel@tonic-gate hat_kpm_delmem_mseg_update(seg, segpp); 2575*0Sstevel@tonic-gate *segpp = seg->next; 2576*0Sstevel@tonic-gate membar_producer(); /* TODO: Needed? */ 2577*0Sstevel@tonic-gate npgs += MSEG_NPAGES(seg); 2578*0Sstevel@tonic-gate 2579*0Sstevel@tonic-gate /* 2580*0Sstevel@tonic-gate * Leave the deleted segment's next pointer intact 2581*0Sstevel@tonic-gate * in case a memsegs scanning loop is walking this 2582*0Sstevel@tonic-gate * segment concurrently. 2583*0Sstevel@tonic-gate */ 2584*0Sstevel@tonic-gate seg->lnext = seglist; 2585*0Sstevel@tonic-gate seglist = seg; 2586*0Sstevel@tonic-gate } 2587*0Sstevel@tonic-gate } 2588*0Sstevel@tonic-gate 2589*0Sstevel@tonic-gate build_pfn_hash(); 2590*0Sstevel@tonic-gate 2591*0Sstevel@tonic-gate ASSERT(npgs < total_pages); 2592*0Sstevel@tonic-gate total_pages -= npgs; 2593*0Sstevel@tonic-gate 2594*0Sstevel@tonic-gate /* 2595*0Sstevel@tonic-gate * Recalculate the paging parameters now total_pages has changed. 2596*0Sstevel@tonic-gate * This will also cause the clock hands to be reset before next use. 2597*0Sstevel@tonic-gate */ 2598*0Sstevel@tonic-gate setupclock(1); 2599*0Sstevel@tonic-gate 2600*0Sstevel@tonic-gate memsegs_unlock(1); 2601*0Sstevel@tonic-gate 2602*0Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 2603*0Sstevel@tonic-gate 2604*0Sstevel@tonic-gate while ((seg = seglist) != NULL) { 2605*0Sstevel@tonic-gate pfn_t mseg_start; 2606*0Sstevel@tonic-gate pfn_t mseg_base, mseg_end; 2607*0Sstevel@tonic-gate pgcnt_t mseg_npgs; 2608*0Sstevel@tonic-gate page_t *pp; 2609*0Sstevel@tonic-gate pgcnt_t metapgs; 2610*0Sstevel@tonic-gate int dynamic; 2611*0Sstevel@tonic-gate int mlret; 2612*0Sstevel@tonic-gate 2613*0Sstevel@tonic-gate seglist = seg->lnext; 2614*0Sstevel@tonic-gate 2615*0Sstevel@tonic-gate /* 2616*0Sstevel@tonic-gate * Put the page_t's into the deleted state to stop 2617*0Sstevel@tonic-gate * cv_wait()s on the pages. When we remap, the dummy 2618*0Sstevel@tonic-gate * page_t's will be in the same state. 2619*0Sstevel@tonic-gate */ 2620*0Sstevel@tonic-gate memseg_lock_delete_all(seg); 2621*0Sstevel@tonic-gate /* 2622*0Sstevel@tonic-gate * Collect up information based on pages_base and pages_end 2623*0Sstevel@tonic-gate * early so that we can flag early that the memseg has been 2624*0Sstevel@tonic-gate * deleted by setting pages_end == pages_base. 2625*0Sstevel@tonic-gate */ 2626*0Sstevel@tonic-gate mseg_base = seg->pages_base; 2627*0Sstevel@tonic-gate mseg_end = seg->pages_end; 2628*0Sstevel@tonic-gate mseg_npgs = MSEG_NPAGES(seg); 2629*0Sstevel@tonic-gate dynamic = memseg_is_dynamic(seg, &mseg_start); 2630*0Sstevel@tonic-gate 2631*0Sstevel@tonic-gate seg->pages_end = seg->pages_base; 2632*0Sstevel@tonic-gate 2633*0Sstevel@tonic-gate if (dynamic) { 2634*0Sstevel@tonic-gate pp = seg->pages; 2635*0Sstevel@tonic-gate metapgs = mseg_base - mseg_start; 2636*0Sstevel@tonic-gate ASSERT(metapgs != 0); 2637*0Sstevel@tonic-gate 2638*0Sstevel@tonic-gate /* Remap the meta data to our special dummy area. */ 2639*0Sstevel@tonic-gate memseg_remap_to_dummy((caddr_t)pp, metapgs); 2640*0Sstevel@tonic-gate 2641*0Sstevel@tonic-gate mutex_enter(&memseg_lists_lock); 2642*0Sstevel@tonic-gate seg->lnext = memseg_va_avail; 2643*0Sstevel@tonic-gate memseg_va_avail = seg; 2644*0Sstevel@tonic-gate mutex_exit(&memseg_lists_lock); 2645*0Sstevel@tonic-gate } else { 2646*0Sstevel@tonic-gate /* 2647*0Sstevel@tonic-gate * Set for clean-up below. 2648*0Sstevel@tonic-gate */ 2649*0Sstevel@tonic-gate mseg_start = seg->pages_base; 2650*0Sstevel@tonic-gate /* 2651*0Sstevel@tonic-gate * For memory whose page_ts were allocated 2652*0Sstevel@tonic-gate * at boot, we need to find a new use for 2653*0Sstevel@tonic-gate * the page_t memory. 2654*0Sstevel@tonic-gate * For the moment, just leak it. 2655*0Sstevel@tonic-gate * (It is held in the memseg_delete_junk list.) 2656*0Sstevel@tonic-gate */ 2657*0Sstevel@tonic-gate 2658*0Sstevel@tonic-gate mutex_enter(&memseg_lists_lock); 2659*0Sstevel@tonic-gate seg->lnext = memseg_delete_junk; 2660*0Sstevel@tonic-gate memseg_delete_junk = seg; 2661*0Sstevel@tonic-gate mutex_exit(&memseg_lists_lock); 2662*0Sstevel@tonic-gate } 2663*0Sstevel@tonic-gate 2664*0Sstevel@tonic-gate /* Must not use seg now as it could be re-used. */ 2665*0Sstevel@tonic-gate 2666*0Sstevel@tonic-gate memlist_write_lock(); 2667*0Sstevel@tonic-gate 2668*0Sstevel@tonic-gate mlret = memlist_delete_span( 2669*0Sstevel@tonic-gate (uint64_t)(mseg_base) << PAGESHIFT, 2670*0Sstevel@tonic-gate (uint64_t)(mseg_npgs) << PAGESHIFT, 2671*0Sstevel@tonic-gate &phys_avail); 2672*0Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK); 2673*0Sstevel@tonic-gate 2674*0Sstevel@tonic-gate mlret = memlist_delete_span( 2675*0Sstevel@tonic-gate (uint64_t)(mseg_start) << PAGESHIFT, 2676*0Sstevel@tonic-gate (uint64_t)(mseg_end - mseg_start) << 2677*0Sstevel@tonic-gate PAGESHIFT, 2678*0Sstevel@tonic-gate &phys_install); 2679*0Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK); 2680*0Sstevel@tonic-gate phys_install_has_changed(); 2681*0Sstevel@tonic-gate 2682*0Sstevel@tonic-gate memlist_write_unlock(); 2683*0Sstevel@tonic-gate } 2684*0Sstevel@tonic-gate 2685*0Sstevel@tonic-gate memlist_read_lock(); 2686*0Sstevel@tonic-gate installed_top_size(phys_install, &physmax, &physinstalled); 2687*0Sstevel@tonic-gate memlist_read_unlock(); 2688*0Sstevel@tonic-gate 2689*0Sstevel@tonic-gate mutex_enter(&freemem_lock); 2690*0Sstevel@tonic-gate maxmem -= avpgs; 2691*0Sstevel@tonic-gate physmem -= avpgs; 2692*0Sstevel@tonic-gate /* availrmem is adjusted during the delete. */ 2693*0Sstevel@tonic-gate availrmem_initial -= avpgs; 2694*0Sstevel@tonic-gate 2695*0Sstevel@tonic-gate mutex_exit(&freemem_lock); 2696*0Sstevel@tonic-gate 2697*0Sstevel@tonic-gate dump_resize(); 2698*0Sstevel@tonic-gate 2699*0Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_delete: mem = %ldK " 2700*0Sstevel@tonic-gate "(0x%" PRIx64 ")\n", 2701*0Sstevel@tonic-gate physinstalled << (PAGESHIFT - 10), 2702*0Sstevel@tonic-gate (uint64_t)physinstalled << PAGESHIFT); 2703*0Sstevel@tonic-gate 2704*0Sstevel@tonic-gate avmem = (uint64_t)freemem << PAGESHIFT; 2705*0Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_delete: " 2706*0Sstevel@tonic-gate "avail mem = %" PRId64 "\n", avmem); 2707*0Sstevel@tonic-gate 2708*0Sstevel@tonic-gate /* 2709*0Sstevel@tonic-gate * Update lgroup generation number on single lgroup systems 2710*0Sstevel@tonic-gate */ 2711*0Sstevel@tonic-gate if (nlgrps == 1) 2712*0Sstevel@tonic-gate lgrp_config(LGRP_CONFIG_GEN_UPDATE, 0, 0); 2713*0Sstevel@tonic-gate 2714*0Sstevel@tonic-gate /* Successfully deleted system memory */ 2715*0Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 2716*0Sstevel@tonic-gate } 2717*0Sstevel@tonic-gate 2718*0Sstevel@tonic-gate static uint_t mdel_nullvp_waiter; 2719*0Sstevel@tonic-gate 2720*0Sstevel@tonic-gate static void 2721*0Sstevel@tonic-gate page_delete_collect( 2722*0Sstevel@tonic-gate page_t *pp, 2723*0Sstevel@tonic-gate struct mem_handle *mhp) 2724*0Sstevel@tonic-gate { 2725*0Sstevel@tonic-gate if (pp->p_vnode) { 2726*0Sstevel@tonic-gate page_hashout(pp, (kmutex_t *)NULL); 2727*0Sstevel@tonic-gate /* do not do PP_SETAGED(pp); */ 2728*0Sstevel@tonic-gate } else { 2729*0Sstevel@tonic-gate kmutex_t *sep; 2730*0Sstevel@tonic-gate 2731*0Sstevel@tonic-gate sep = page_se_mutex(pp); 2732*0Sstevel@tonic-gate mutex_enter(sep); 2733*0Sstevel@tonic-gate if (CV_HAS_WAITERS(&pp->p_cv)) { 2734*0Sstevel@tonic-gate mdel_nullvp_waiter++; 2735*0Sstevel@tonic-gate cv_broadcast(&pp->p_cv); 2736*0Sstevel@tonic-gate } 2737*0Sstevel@tonic-gate mutex_exit(sep); 2738*0Sstevel@tonic-gate } 2739*0Sstevel@tonic-gate ASSERT(pp->p_next == pp->p_prev); 2740*0Sstevel@tonic-gate ASSERT(pp->p_next == NULL || pp->p_next == pp); 2741*0Sstevel@tonic-gate pp->p_next = mhp->mh_deleted; 2742*0Sstevel@tonic-gate mhp->mh_deleted = pp; 2743*0Sstevel@tonic-gate ASSERT(mhp->mh_hold_todo != 0); 2744*0Sstevel@tonic-gate mhp->mh_hold_todo--; 2745*0Sstevel@tonic-gate } 2746*0Sstevel@tonic-gate 2747*0Sstevel@tonic-gate static void 2748*0Sstevel@tonic-gate transit_list_collect(struct mem_handle *mhp, int v) 2749*0Sstevel@tonic-gate { 2750*0Sstevel@tonic-gate struct transit_list_head *trh; 2751*0Sstevel@tonic-gate 2752*0Sstevel@tonic-gate trh = &transit_list_head; 2753*0Sstevel@tonic-gate mutex_enter(&trh->trh_lock); 2754*0Sstevel@tonic-gate mhp->mh_transit.trl_collect = v; 2755*0Sstevel@tonic-gate mutex_exit(&trh->trh_lock); 2756*0Sstevel@tonic-gate } 2757*0Sstevel@tonic-gate 2758*0Sstevel@tonic-gate static void 2759*0Sstevel@tonic-gate transit_list_insert(struct transit_list *tlp) 2760*0Sstevel@tonic-gate { 2761*0Sstevel@tonic-gate struct transit_list_head *trh; 2762*0Sstevel@tonic-gate 2763*0Sstevel@tonic-gate trh = &transit_list_head; 2764*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&trh->trh_lock)); 2765*0Sstevel@tonic-gate tlp->trl_next = trh->trh_head; 2766*0Sstevel@tonic-gate trh->trh_head = tlp; 2767*0Sstevel@tonic-gate } 2768*0Sstevel@tonic-gate 2769*0Sstevel@tonic-gate static void 2770*0Sstevel@tonic-gate transit_list_remove(struct transit_list *tlp) 2771*0Sstevel@tonic-gate { 2772*0Sstevel@tonic-gate struct transit_list_head *trh; 2773*0Sstevel@tonic-gate struct transit_list **tlpp; 2774*0Sstevel@tonic-gate 2775*0Sstevel@tonic-gate trh = &transit_list_head; 2776*0Sstevel@tonic-gate tlpp = &trh->trh_head; 2777*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&trh->trh_lock)); 2778*0Sstevel@tonic-gate while (*tlpp != NULL && *tlpp != tlp) 2779*0Sstevel@tonic-gate tlpp = &(*tlpp)->trl_next; 2780*0Sstevel@tonic-gate ASSERT(*tlpp != NULL); 2781*0Sstevel@tonic-gate if (*tlpp == tlp) 2782*0Sstevel@tonic-gate *tlpp = tlp->trl_next; 2783*0Sstevel@tonic-gate tlp->trl_next = NULL; 2784*0Sstevel@tonic-gate } 2785*0Sstevel@tonic-gate 2786*0Sstevel@tonic-gate static struct transit_list * 2787*0Sstevel@tonic-gate pfnum_to_transit_list(struct transit_list_head *trh, pfn_t pfnum) 2788*0Sstevel@tonic-gate { 2789*0Sstevel@tonic-gate struct transit_list *tlp; 2790*0Sstevel@tonic-gate 2791*0Sstevel@tonic-gate for (tlp = trh->trh_head; tlp != NULL; tlp = tlp->trl_next) { 2792*0Sstevel@tonic-gate struct memdelspan *mdsp; 2793*0Sstevel@tonic-gate 2794*0Sstevel@tonic-gate for (mdsp = tlp->trl_spans; mdsp != NULL; 2795*0Sstevel@tonic-gate mdsp = mdsp->mds_next) { 2796*0Sstevel@tonic-gate if (pfnum >= mdsp->mds_base && 2797*0Sstevel@tonic-gate pfnum < (mdsp->mds_base + mdsp->mds_npgs)) { 2798*0Sstevel@tonic-gate return (tlp); 2799*0Sstevel@tonic-gate } 2800*0Sstevel@tonic-gate } 2801*0Sstevel@tonic-gate } 2802*0Sstevel@tonic-gate return (NULL); 2803*0Sstevel@tonic-gate } 2804*0Sstevel@tonic-gate 2805*0Sstevel@tonic-gate int 2806*0Sstevel@tonic-gate pfn_is_being_deleted(pfn_t pfnum) 2807*0Sstevel@tonic-gate { 2808*0Sstevel@tonic-gate struct transit_list_head *trh; 2809*0Sstevel@tonic-gate struct transit_list *tlp; 2810*0Sstevel@tonic-gate int ret; 2811*0Sstevel@tonic-gate 2812*0Sstevel@tonic-gate trh = &transit_list_head; 2813*0Sstevel@tonic-gate if (trh->trh_head == NULL) 2814*0Sstevel@tonic-gate return (0); 2815*0Sstevel@tonic-gate 2816*0Sstevel@tonic-gate mutex_enter(&trh->trh_lock); 2817*0Sstevel@tonic-gate tlp = pfnum_to_transit_list(trh, pfnum); 2818*0Sstevel@tonic-gate ret = (tlp != NULL && tlp->trl_collect); 2819*0Sstevel@tonic-gate mutex_exit(&trh->trh_lock); 2820*0Sstevel@tonic-gate 2821*0Sstevel@tonic-gate return (ret); 2822*0Sstevel@tonic-gate } 2823*0Sstevel@tonic-gate 2824*0Sstevel@tonic-gate #ifdef MEM_DEL_STATS 2825*0Sstevel@tonic-gate extern int hz; 2826*0Sstevel@tonic-gate static void 2827*0Sstevel@tonic-gate mem_del_stat_print_func(struct mem_handle *mhp) 2828*0Sstevel@tonic-gate { 2829*0Sstevel@tonic-gate uint64_t tmp; 2830*0Sstevel@tonic-gate 2831*0Sstevel@tonic-gate if (mem_del_stat_print) { 2832*0Sstevel@tonic-gate printf("memory delete loop %x/%x, statistics%s\n", 2833*0Sstevel@tonic-gate (uint_t)mhp->mh_transit.trl_spans->mds_base, 2834*0Sstevel@tonic-gate (uint_t)mhp->mh_transit.trl_spans->mds_npgs, 2835*0Sstevel@tonic-gate (mhp->mh_cancel ? " (cancelled)" : "")); 2836*0Sstevel@tonic-gate printf("\t%8u nloop\n", mhp->mh_delstat.nloop); 2837*0Sstevel@tonic-gate printf("\t%8u need_free\n", mhp->mh_delstat.need_free); 2838*0Sstevel@tonic-gate printf("\t%8u free_loop\n", mhp->mh_delstat.free_loop); 2839*0Sstevel@tonic-gate printf("\t%8u free_low\n", mhp->mh_delstat.free_low); 2840*0Sstevel@tonic-gate printf("\t%8u free_failed\n", mhp->mh_delstat.free_failed); 2841*0Sstevel@tonic-gate printf("\t%8u ncheck\n", mhp->mh_delstat.ncheck); 2842*0Sstevel@tonic-gate printf("\t%8u nopaget\n", mhp->mh_delstat.nopaget); 2843*0Sstevel@tonic-gate printf("\t%8u lockfail\n", mhp->mh_delstat.lockfail); 2844*0Sstevel@tonic-gate printf("\t%8u nfree\n", mhp->mh_delstat.nfree); 2845*0Sstevel@tonic-gate printf("\t%8u nreloc\n", mhp->mh_delstat.nreloc); 2846*0Sstevel@tonic-gate printf("\t%8u nrelocfail\n", mhp->mh_delstat.nrelocfail); 2847*0Sstevel@tonic-gate printf("\t%8u already_done\n", mhp->mh_delstat.already_done); 2848*0Sstevel@tonic-gate printf("\t%8u first_notfree\n", mhp->mh_delstat.first_notfree); 2849*0Sstevel@tonic-gate printf("\t%8u npplocked\n", mhp->mh_delstat.npplocked); 2850*0Sstevel@tonic-gate printf("\t%8u nlockreloc\n", mhp->mh_delstat.nlockreloc); 2851*0Sstevel@tonic-gate printf("\t%8u nnorepl\n", mhp->mh_delstat.nnorepl); 2852*0Sstevel@tonic-gate printf("\t%8u nmodreloc\n", mhp->mh_delstat.nmodreloc); 2853*0Sstevel@tonic-gate printf("\t%8u ndestroy\n", mhp->mh_delstat.ndestroy); 2854*0Sstevel@tonic-gate printf("\t%8u nputpage\n", mhp->mh_delstat.nputpage); 2855*0Sstevel@tonic-gate printf("\t%8u nnoreclaim\n", mhp->mh_delstat.nnoreclaim); 2856*0Sstevel@tonic-gate printf("\t%8u ndelay\n", mhp->mh_delstat.ndelay); 2857*0Sstevel@tonic-gate printf("\t%8u demotefail\n", mhp->mh_delstat.demotefail); 2858*0Sstevel@tonic-gate printf("\t%8u retired\n", mhp->mh_delstat.retired); 2859*0Sstevel@tonic-gate printf("\t%8u toxic\n", mhp->mh_delstat.toxic); 2860*0Sstevel@tonic-gate printf("\t%8u failing\n", mhp->mh_delstat.failing); 2861*0Sstevel@tonic-gate printf("\t%8u modtoxic\n", mhp->mh_delstat.modtoxic); 2862*0Sstevel@tonic-gate printf("\t%8u npplkdtoxic\n", mhp->mh_delstat.npplkdtoxic); 2863*0Sstevel@tonic-gate printf("\t%8u gptlmodfail\n", mhp->mh_delstat.gptlmodfail); 2864*0Sstevel@tonic-gate printf("\t%8u gptllckfail\n", mhp->mh_delstat.gptllckfail); 2865*0Sstevel@tonic-gate tmp = mhp->mh_delstat.nticks_total / hz; /* seconds */ 2866*0Sstevel@tonic-gate printf( 2867*0Sstevel@tonic-gate "\t%"PRIu64" nticks_total - %"PRIu64" min %"PRIu64" sec\n", 2868*0Sstevel@tonic-gate mhp->mh_delstat.nticks_total, tmp / 60, tmp % 60); 2869*0Sstevel@tonic-gate 2870*0Sstevel@tonic-gate tmp = mhp->mh_delstat.nticks_pgrp / hz; /* seconds */ 2871*0Sstevel@tonic-gate printf( 2872*0Sstevel@tonic-gate "\t%"PRIu64" nticks_pgrp - %"PRIu64" min %"PRIu64" sec\n", 2873*0Sstevel@tonic-gate mhp->mh_delstat.nticks_pgrp, tmp / 60, tmp % 60); 2874*0Sstevel@tonic-gate } 2875*0Sstevel@tonic-gate } 2876*0Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 2877*0Sstevel@tonic-gate 2878*0Sstevel@tonic-gate struct mem_callback { 2879*0Sstevel@tonic-gate kphysm_setup_vector_t *vec; 2880*0Sstevel@tonic-gate void *arg; 2881*0Sstevel@tonic-gate }; 2882*0Sstevel@tonic-gate 2883*0Sstevel@tonic-gate #define NMEMCALLBACKS 100 2884*0Sstevel@tonic-gate 2885*0Sstevel@tonic-gate static struct mem_callback mem_callbacks[NMEMCALLBACKS]; 2886*0Sstevel@tonic-gate static uint_t nmemcallbacks; 2887*0Sstevel@tonic-gate static krwlock_t mem_callback_rwlock; 2888*0Sstevel@tonic-gate 2889*0Sstevel@tonic-gate int 2890*0Sstevel@tonic-gate kphysm_setup_func_register(kphysm_setup_vector_t *vec, void *arg) 2891*0Sstevel@tonic-gate { 2892*0Sstevel@tonic-gate uint_t i, found; 2893*0Sstevel@tonic-gate 2894*0Sstevel@tonic-gate /* 2895*0Sstevel@tonic-gate * This test will become more complicated when the version must 2896*0Sstevel@tonic-gate * change. 2897*0Sstevel@tonic-gate */ 2898*0Sstevel@tonic-gate if (vec->version != KPHYSM_SETUP_VECTOR_VERSION) 2899*0Sstevel@tonic-gate return (EINVAL); 2900*0Sstevel@tonic-gate 2901*0Sstevel@tonic-gate if (vec->post_add == NULL || vec->pre_del == NULL || 2902*0Sstevel@tonic-gate vec->post_del == NULL) 2903*0Sstevel@tonic-gate return (EINVAL); 2904*0Sstevel@tonic-gate 2905*0Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_WRITER); 2906*0Sstevel@tonic-gate for (i = 0, found = 0; i < nmemcallbacks; i++) { 2907*0Sstevel@tonic-gate if (mem_callbacks[i].vec == NULL && found == 0) 2908*0Sstevel@tonic-gate found = i + 1; 2909*0Sstevel@tonic-gate if (mem_callbacks[i].vec == vec && 2910*0Sstevel@tonic-gate mem_callbacks[i].arg == arg) { 2911*0Sstevel@tonic-gate #ifdef DEBUG 2912*0Sstevel@tonic-gate /* Catch this in DEBUG kernels. */ 2913*0Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_setup_func_register" 2914*0Sstevel@tonic-gate "(0x%p, 0x%p) duplicate registration from 0x%p", 2915*0Sstevel@tonic-gate (void *)vec, arg, (void *)caller()); 2916*0Sstevel@tonic-gate #endif /* DEBUG */ 2917*0Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 2918*0Sstevel@tonic-gate return (EEXIST); 2919*0Sstevel@tonic-gate } 2920*0Sstevel@tonic-gate } 2921*0Sstevel@tonic-gate if (found != 0) { 2922*0Sstevel@tonic-gate i = found - 1; 2923*0Sstevel@tonic-gate } else { 2924*0Sstevel@tonic-gate ASSERT(nmemcallbacks < NMEMCALLBACKS); 2925*0Sstevel@tonic-gate if (nmemcallbacks == NMEMCALLBACKS) { 2926*0Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 2927*0Sstevel@tonic-gate return (ENOMEM); 2928*0Sstevel@tonic-gate } 2929*0Sstevel@tonic-gate i = nmemcallbacks++; 2930*0Sstevel@tonic-gate } 2931*0Sstevel@tonic-gate mem_callbacks[i].vec = vec; 2932*0Sstevel@tonic-gate mem_callbacks[i].arg = arg; 2933*0Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 2934*0Sstevel@tonic-gate return (0); 2935*0Sstevel@tonic-gate } 2936*0Sstevel@tonic-gate 2937*0Sstevel@tonic-gate void 2938*0Sstevel@tonic-gate kphysm_setup_func_unregister(kphysm_setup_vector_t *vec, void *arg) 2939*0Sstevel@tonic-gate { 2940*0Sstevel@tonic-gate uint_t i; 2941*0Sstevel@tonic-gate 2942*0Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_WRITER); 2943*0Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) { 2944*0Sstevel@tonic-gate if (mem_callbacks[i].vec == vec && 2945*0Sstevel@tonic-gate mem_callbacks[i].arg == arg) { 2946*0Sstevel@tonic-gate mem_callbacks[i].vec = NULL; 2947*0Sstevel@tonic-gate mem_callbacks[i].arg = NULL; 2948*0Sstevel@tonic-gate if (i == (nmemcallbacks - 1)) 2949*0Sstevel@tonic-gate nmemcallbacks--; 2950*0Sstevel@tonic-gate break; 2951*0Sstevel@tonic-gate } 2952*0Sstevel@tonic-gate } 2953*0Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 2954*0Sstevel@tonic-gate } 2955*0Sstevel@tonic-gate 2956*0Sstevel@tonic-gate static void 2957*0Sstevel@tonic-gate kphysm_setup_post_add(pgcnt_t delta_pages) 2958*0Sstevel@tonic-gate { 2959*0Sstevel@tonic-gate uint_t i; 2960*0Sstevel@tonic-gate 2961*0Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_READER); 2962*0Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) { 2963*0Sstevel@tonic-gate if (mem_callbacks[i].vec != NULL) { 2964*0Sstevel@tonic-gate (*mem_callbacks[i].vec->post_add) 2965*0Sstevel@tonic-gate (mem_callbacks[i].arg, delta_pages); 2966*0Sstevel@tonic-gate } 2967*0Sstevel@tonic-gate } 2968*0Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 2969*0Sstevel@tonic-gate } 2970*0Sstevel@tonic-gate 2971*0Sstevel@tonic-gate /* 2972*0Sstevel@tonic-gate * Note the locking between pre_del and post_del: The reader lock is held 2973*0Sstevel@tonic-gate * between the two calls to stop the set of functions from changing. 2974*0Sstevel@tonic-gate */ 2975*0Sstevel@tonic-gate 2976*0Sstevel@tonic-gate static int 2977*0Sstevel@tonic-gate kphysm_setup_pre_del(pgcnt_t delta_pages) 2978*0Sstevel@tonic-gate { 2979*0Sstevel@tonic-gate uint_t i; 2980*0Sstevel@tonic-gate int ret; 2981*0Sstevel@tonic-gate int aret; 2982*0Sstevel@tonic-gate 2983*0Sstevel@tonic-gate ret = 0; 2984*0Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_READER); 2985*0Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) { 2986*0Sstevel@tonic-gate if (mem_callbacks[i].vec != NULL) { 2987*0Sstevel@tonic-gate aret = (*mem_callbacks[i].vec->pre_del) 2988*0Sstevel@tonic-gate (mem_callbacks[i].arg, delta_pages); 2989*0Sstevel@tonic-gate ret |= aret; 2990*0Sstevel@tonic-gate } 2991*0Sstevel@tonic-gate } 2992*0Sstevel@tonic-gate 2993*0Sstevel@tonic-gate return (ret); 2994*0Sstevel@tonic-gate } 2995*0Sstevel@tonic-gate 2996*0Sstevel@tonic-gate static void 2997*0Sstevel@tonic-gate kphysm_setup_post_del(pgcnt_t delta_pages, int cancelled) 2998*0Sstevel@tonic-gate { 2999*0Sstevel@tonic-gate uint_t i; 3000*0Sstevel@tonic-gate 3001*0Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) { 3002*0Sstevel@tonic-gate if (mem_callbacks[i].vec != NULL) { 3003*0Sstevel@tonic-gate (*mem_callbacks[i].vec->post_del) 3004*0Sstevel@tonic-gate (mem_callbacks[i].arg, delta_pages, cancelled); 3005*0Sstevel@tonic-gate } 3006*0Sstevel@tonic-gate } 3007*0Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 3008*0Sstevel@tonic-gate } 3009*0Sstevel@tonic-gate 3010*0Sstevel@tonic-gate static int 3011*0Sstevel@tonic-gate kphysm_split_memseg( 3012*0Sstevel@tonic-gate pfn_t base, 3013*0Sstevel@tonic-gate pgcnt_t npgs) 3014*0Sstevel@tonic-gate { 3015*0Sstevel@tonic-gate struct memseg *seg; 3016*0Sstevel@tonic-gate struct memseg **segpp; 3017*0Sstevel@tonic-gate pgcnt_t size_low, size_high; 3018*0Sstevel@tonic-gate struct memseg *seg_low, *seg_mid, *seg_high; 3019*0Sstevel@tonic-gate 3020*0Sstevel@tonic-gate /* 3021*0Sstevel@tonic-gate * Lock the memsegs list against other updates now 3022*0Sstevel@tonic-gate */ 3023*0Sstevel@tonic-gate memsegs_lock(1); 3024*0Sstevel@tonic-gate 3025*0Sstevel@tonic-gate /* 3026*0Sstevel@tonic-gate * Find boot time memseg that wholly covers this area. 3027*0Sstevel@tonic-gate */ 3028*0Sstevel@tonic-gate 3029*0Sstevel@tonic-gate /* First find the memseg with page 'base' in it. */ 3030*0Sstevel@tonic-gate for (segpp = &memsegs; (seg = *segpp) != NULL; 3031*0Sstevel@tonic-gate segpp = &((*segpp)->next)) { 3032*0Sstevel@tonic-gate if (base >= seg->pages_base && base < seg->pages_end) 3033*0Sstevel@tonic-gate break; 3034*0Sstevel@tonic-gate } 3035*0Sstevel@tonic-gate if (seg == NULL) { 3036*0Sstevel@tonic-gate memsegs_unlock(1); 3037*0Sstevel@tonic-gate return (0); 3038*0Sstevel@tonic-gate } 3039*0Sstevel@tonic-gate if (memseg_is_dynamic(seg, (pfn_t *)NULL)) { 3040*0Sstevel@tonic-gate memsegs_unlock(1); 3041*0Sstevel@tonic-gate return (0); 3042*0Sstevel@tonic-gate } 3043*0Sstevel@tonic-gate if ((base + npgs) > seg->pages_end) { 3044*0Sstevel@tonic-gate memsegs_unlock(1); 3045*0Sstevel@tonic-gate return (0); 3046*0Sstevel@tonic-gate } 3047*0Sstevel@tonic-gate 3048*0Sstevel@tonic-gate /* 3049*0Sstevel@tonic-gate * Work out the size of the two segments that will 3050*0Sstevel@tonic-gate * surround the new segment, one for low address 3051*0Sstevel@tonic-gate * and one for high. 3052*0Sstevel@tonic-gate */ 3053*0Sstevel@tonic-gate ASSERT(base >= seg->pages_base); 3054*0Sstevel@tonic-gate size_low = base - seg->pages_base; 3055*0Sstevel@tonic-gate ASSERT(seg->pages_end >= (base + npgs)); 3056*0Sstevel@tonic-gate size_high = seg->pages_end - (base + npgs); 3057*0Sstevel@tonic-gate 3058*0Sstevel@tonic-gate /* 3059*0Sstevel@tonic-gate * Sanity check. 3060*0Sstevel@tonic-gate */ 3061*0Sstevel@tonic-gate if ((size_low + size_high) == 0) { 3062*0Sstevel@tonic-gate memsegs_unlock(1); 3063*0Sstevel@tonic-gate return (0); 3064*0Sstevel@tonic-gate } 3065*0Sstevel@tonic-gate 3066*0Sstevel@tonic-gate /* 3067*0Sstevel@tonic-gate * Allocate the new structures. The old memseg will not be freed 3068*0Sstevel@tonic-gate * as there may be a reference to it. 3069*0Sstevel@tonic-gate */ 3070*0Sstevel@tonic-gate seg_low = NULL; 3071*0Sstevel@tonic-gate seg_high = NULL; 3072*0Sstevel@tonic-gate 3073*0Sstevel@tonic-gate if (size_low != 0) { 3074*0Sstevel@tonic-gate seg_low = kmem_cache_alloc(memseg_cache, KM_SLEEP); 3075*0Sstevel@tonic-gate bzero(seg_low, sizeof (struct memseg)); 3076*0Sstevel@tonic-gate } 3077*0Sstevel@tonic-gate 3078*0Sstevel@tonic-gate seg_mid = kmem_cache_alloc(memseg_cache, KM_SLEEP); 3079*0Sstevel@tonic-gate bzero(seg_mid, sizeof (struct memseg)); 3080*0Sstevel@tonic-gate 3081*0Sstevel@tonic-gate if (size_high != 0) { 3082*0Sstevel@tonic-gate seg_high = kmem_cache_alloc(memseg_cache, KM_SLEEP); 3083*0Sstevel@tonic-gate bzero(seg_high, sizeof (struct memseg)); 3084*0Sstevel@tonic-gate } 3085*0Sstevel@tonic-gate 3086*0Sstevel@tonic-gate /* 3087*0Sstevel@tonic-gate * All allocation done now. 3088*0Sstevel@tonic-gate */ 3089*0Sstevel@tonic-gate if (size_low != 0) { 3090*0Sstevel@tonic-gate seg_low->pages = seg->pages; 3091*0Sstevel@tonic-gate seg_low->epages = seg_low->pages + size_low; 3092*0Sstevel@tonic-gate seg_low->pages_base = seg->pages_base; 3093*0Sstevel@tonic-gate seg_low->pages_end = seg_low->pages_base + size_low; 3094*0Sstevel@tonic-gate seg_low->next = seg_mid; 3095*0Sstevel@tonic-gate } 3096*0Sstevel@tonic-gate if (size_high != 0) { 3097*0Sstevel@tonic-gate seg_high->pages = seg->epages - size_high; 3098*0Sstevel@tonic-gate seg_high->epages = seg_high->pages + size_high; 3099*0Sstevel@tonic-gate seg_high->pages_base = seg->pages_end - size_high; 3100*0Sstevel@tonic-gate seg_high->pages_end = seg_high->pages_base + size_high; 3101*0Sstevel@tonic-gate seg_high->next = seg->next; 3102*0Sstevel@tonic-gate } 3103*0Sstevel@tonic-gate 3104*0Sstevel@tonic-gate seg_mid->pages = seg->pages + size_low; 3105*0Sstevel@tonic-gate seg_mid->pages_base = seg->pages_base + size_low; 3106*0Sstevel@tonic-gate seg_mid->epages = seg->epages - size_high; 3107*0Sstevel@tonic-gate seg_mid->pages_end = seg->pages_end - size_high; 3108*0Sstevel@tonic-gate seg_mid->next = (seg_high != NULL) ? seg_high : seg->next; 3109*0Sstevel@tonic-gate 3110*0Sstevel@tonic-gate /* 3111*0Sstevel@tonic-gate * Update hat_kpm specific info of all involved memsegs and 3112*0Sstevel@tonic-gate * allow hat_kpm specific global chain updates. 3113*0Sstevel@tonic-gate */ 3114*0Sstevel@tonic-gate hat_kpm_split_mseg_update(seg, segpp, seg_low, seg_mid, seg_high); 3115*0Sstevel@tonic-gate 3116*0Sstevel@tonic-gate /* 3117*0Sstevel@tonic-gate * At this point we have two equivalent memseg sub-chains, 3118*0Sstevel@tonic-gate * seg and seg_low/seg_mid/seg_high, which both chain on to 3119*0Sstevel@tonic-gate * the same place in the global chain. By re-writing the pointer 3120*0Sstevel@tonic-gate * in the previous element we switch atomically from using the old 3121*0Sstevel@tonic-gate * (seg) to the new. 3122*0Sstevel@tonic-gate */ 3123*0Sstevel@tonic-gate *segpp = (seg_low != NULL) ? seg_low : seg_mid; 3124*0Sstevel@tonic-gate 3125*0Sstevel@tonic-gate membar_enter(); 3126*0Sstevel@tonic-gate 3127*0Sstevel@tonic-gate build_pfn_hash(); 3128*0Sstevel@tonic-gate memsegs_unlock(1); 3129*0Sstevel@tonic-gate 3130*0Sstevel@tonic-gate /* 3131*0Sstevel@tonic-gate * We leave the old segment, 'seg', intact as there may be 3132*0Sstevel@tonic-gate * references to it. Also, as the value of total_pages has not 3133*0Sstevel@tonic-gate * changed and the memsegs list is effectively the same when 3134*0Sstevel@tonic-gate * accessed via the old or the new pointer, we do not have to 3135*0Sstevel@tonic-gate * cause pageout_scanner() to re-evaluate its hand pointers. 3136*0Sstevel@tonic-gate * 3137*0Sstevel@tonic-gate * We currently do not re-use or reclaim the page_t memory. 3138*0Sstevel@tonic-gate * If we do, then this may have to change. 3139*0Sstevel@tonic-gate */ 3140*0Sstevel@tonic-gate 3141*0Sstevel@tonic-gate mutex_enter(&memseg_lists_lock); 3142*0Sstevel@tonic-gate seg->lnext = memseg_edit_junk; 3143*0Sstevel@tonic-gate memseg_edit_junk = seg; 3144*0Sstevel@tonic-gate mutex_exit(&memseg_lists_lock); 3145*0Sstevel@tonic-gate 3146*0Sstevel@tonic-gate return (1); 3147*0Sstevel@tonic-gate } 3148*0Sstevel@tonic-gate 3149*0Sstevel@tonic-gate /* 3150*0Sstevel@tonic-gate * The memsegs lock is only taken when modifying the memsegs list 3151*0Sstevel@tonic-gate * and rebuilding the pfn hash table (after boot). 3152*0Sstevel@tonic-gate * No lock is needed for read as memseg structure are never de-allocated 3153*0Sstevel@tonic-gate * and the pointer linkage is never updated until the memseg is ready. 3154*0Sstevel@tonic-gate */ 3155*0Sstevel@tonic-gate krwlock_t memsegslock; 3156*0Sstevel@tonic-gate 3157*0Sstevel@tonic-gate void 3158*0Sstevel@tonic-gate memsegs_lock(int writer) 3159*0Sstevel@tonic-gate { 3160*0Sstevel@tonic-gate rw_enter(&memsegslock, writer ? RW_WRITER : RW_READER); 3161*0Sstevel@tonic-gate } 3162*0Sstevel@tonic-gate 3163*0Sstevel@tonic-gate /*ARGSUSED*/ 3164*0Sstevel@tonic-gate void 3165*0Sstevel@tonic-gate memsegs_unlock(int writer) 3166*0Sstevel@tonic-gate { 3167*0Sstevel@tonic-gate rw_exit(&memsegslock); 3168*0Sstevel@tonic-gate } 3169*0Sstevel@tonic-gate 3170*0Sstevel@tonic-gate /* 3171*0Sstevel@tonic-gate * memlist (phys_install, phys_avail) locking. 3172*0Sstevel@tonic-gate */ 3173*0Sstevel@tonic-gate 3174*0Sstevel@tonic-gate /* 3175*0Sstevel@tonic-gate * A read/write lock might be better here. 3176*0Sstevel@tonic-gate */ 3177*0Sstevel@tonic-gate static kmutex_t memlists_mutex; 3178*0Sstevel@tonic-gate 3179*0Sstevel@tonic-gate void 3180*0Sstevel@tonic-gate memlist_read_lock() 3181*0Sstevel@tonic-gate { 3182*0Sstevel@tonic-gate mutex_enter(&memlists_mutex); 3183*0Sstevel@tonic-gate } 3184*0Sstevel@tonic-gate 3185*0Sstevel@tonic-gate void 3186*0Sstevel@tonic-gate memlist_read_unlock() 3187*0Sstevel@tonic-gate { 3188*0Sstevel@tonic-gate mutex_exit(&memlists_mutex); 3189*0Sstevel@tonic-gate } 3190*0Sstevel@tonic-gate 3191*0Sstevel@tonic-gate void 3192*0Sstevel@tonic-gate memlist_write_lock() 3193*0Sstevel@tonic-gate { 3194*0Sstevel@tonic-gate mutex_enter(&memlists_mutex); 3195*0Sstevel@tonic-gate } 3196*0Sstevel@tonic-gate 3197*0Sstevel@tonic-gate void 3198*0Sstevel@tonic-gate memlist_write_unlock() 3199*0Sstevel@tonic-gate { 3200*0Sstevel@tonic-gate mutex_exit(&memlists_mutex); 3201*0Sstevel@tonic-gate } 3202*0Sstevel@tonic-gate 3203*0Sstevel@tonic-gate /* 3204*0Sstevel@tonic-gate * The sfmmu hat layer (e.g.) accesses some parts of the memseg 3205*0Sstevel@tonic-gate * structure using physical addresses. Therefore a kmem_cache is 3206*0Sstevel@tonic-gate * used with KMC_NOHASH to avoid page crossings within a memseg 3207*0Sstevel@tonic-gate * structure. KMC_NOHASH requires that no external (outside of 3208*0Sstevel@tonic-gate * slab) information is allowed. This, in turn, implies that the 3209*0Sstevel@tonic-gate * cache's slabsize must be exactly a single page, since per-slab 3210*0Sstevel@tonic-gate * information (e.g. the freelist for the slab) is kept at the 3211*0Sstevel@tonic-gate * end of the slab, where it is easy to locate. Should be changed 3212*0Sstevel@tonic-gate * when a more obvious kmem_cache interface/flag will become 3213*0Sstevel@tonic-gate * available. 3214*0Sstevel@tonic-gate */ 3215*0Sstevel@tonic-gate void 3216*0Sstevel@tonic-gate mem_config_init() 3217*0Sstevel@tonic-gate { 3218*0Sstevel@tonic-gate memseg_cache = kmem_cache_create("memseg_cache", sizeof (struct memseg), 3219*0Sstevel@tonic-gate 0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH); 3220*0Sstevel@tonic-gate } 3221