10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51582Skchow * Common Development and Distribution License (the "License"). 61582Skchow * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 223446Smrj * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 270Sstevel@tonic-gate 280Sstevel@tonic-gate #include <sys/types.h> 290Sstevel@tonic-gate #include <sys/cmn_err.h> 300Sstevel@tonic-gate #include <sys/vmem.h> 310Sstevel@tonic-gate #include <sys/kmem.h> 320Sstevel@tonic-gate #include <sys/systm.h> 330Sstevel@tonic-gate #include <sys/machsystm.h> /* for page_freelist_coalesce() */ 340Sstevel@tonic-gate #include <sys/errno.h> 350Sstevel@tonic-gate #include <sys/memnode.h> 360Sstevel@tonic-gate #include <sys/memlist.h> 370Sstevel@tonic-gate #include <sys/memlist_impl.h> 380Sstevel@tonic-gate #include <sys/tuneable.h> 390Sstevel@tonic-gate #include <sys/proc.h> 400Sstevel@tonic-gate #include <sys/disp.h> 410Sstevel@tonic-gate #include <sys/debug.h> 420Sstevel@tonic-gate #include <sys/vm.h> 430Sstevel@tonic-gate #include <sys/callb.h> 440Sstevel@tonic-gate #include <sys/memlist_plat.h> /* for installed_top_size() */ 450Sstevel@tonic-gate #include <sys/condvar_impl.h> /* for CV_HAS_WAITERS() */ 460Sstevel@tonic-gate #include <sys/dumphdr.h> /* for dump_resize() */ 470Sstevel@tonic-gate #include <sys/atomic.h> /* for use in stats collection */ 480Sstevel@tonic-gate #include <sys/rwlock.h> 490Sstevel@tonic-gate #include <sys/cpuvar.h> 500Sstevel@tonic-gate #include <vm/seg_kmem.h> 510Sstevel@tonic-gate #include <vm/seg_kpm.h> 520Sstevel@tonic-gate #include <vm/page.h> 531373Skchow #include <vm/vm_dep.h> 540Sstevel@tonic-gate #define SUNDDI_IMPL /* so sunddi.h will not redefine splx() et al */ 550Sstevel@tonic-gate #include <sys/sunddi.h> 560Sstevel@tonic-gate #include <sys/mem_config.h> 570Sstevel@tonic-gate #include <sys/mem_cage.h> 580Sstevel@tonic-gate #include <sys/lgrp.h> 590Sstevel@tonic-gate #include <sys/ddi.h> 600Sstevel@tonic-gate #include <sys/modctl.h> 610Sstevel@tonic-gate 620Sstevel@tonic-gate extern struct memlist *phys_avail; 630Sstevel@tonic-gate 640Sstevel@tonic-gate extern void mem_node_add(pfn_t, pfn_t); 650Sstevel@tonic-gate extern void mem_node_del(pfn_t, pfn_t); 660Sstevel@tonic-gate 670Sstevel@tonic-gate extern uint_t page_ctrs_adjust(int); 680Sstevel@tonic-gate static void kphysm_setup_post_add(pgcnt_t); 690Sstevel@tonic-gate static int kphysm_setup_pre_del(pgcnt_t); 700Sstevel@tonic-gate static void kphysm_setup_post_del(pgcnt_t, int); 710Sstevel@tonic-gate 720Sstevel@tonic-gate static int kphysm_split_memseg(pfn_t base, pgcnt_t npgs); 730Sstevel@tonic-gate 740Sstevel@tonic-gate static int delspan_reserve(pfn_t, pgcnt_t); 750Sstevel@tonic-gate static void delspan_unreserve(pfn_t, pgcnt_t); 760Sstevel@tonic-gate 770Sstevel@tonic-gate static kmutex_t memseg_lists_lock; 780Sstevel@tonic-gate static struct memseg *memseg_va_avail; 790Sstevel@tonic-gate static struct memseg *memseg_delete_junk; 800Sstevel@tonic-gate static struct memseg *memseg_edit_junk; 810Sstevel@tonic-gate void memseg_remap_init(void); 820Sstevel@tonic-gate static void memseg_remap_to_dummy(caddr_t, pgcnt_t); 830Sstevel@tonic-gate static void kphysm_addmem_error_undospan(pfn_t, pgcnt_t); 840Sstevel@tonic-gate static struct memseg *memseg_reuse(pgcnt_t); 850Sstevel@tonic-gate 860Sstevel@tonic-gate static struct kmem_cache *memseg_cache; 870Sstevel@tonic-gate 880Sstevel@tonic-gate /* 890Sstevel@tonic-gate * Add a chunk of memory to the system. page_t's for this memory 900Sstevel@tonic-gate * are allocated in the first few pages of the chunk. 910Sstevel@tonic-gate * base: starting PAGESIZE page of new memory. 920Sstevel@tonic-gate * npgs: length in PAGESIZE pages. 930Sstevel@tonic-gate * 940Sstevel@tonic-gate * Adding mem this way doesn't increase the size of the hash tables; 950Sstevel@tonic-gate * growing them would be too hard. This should be OK, but adding memory 960Sstevel@tonic-gate * dynamically most likely means more hash misses, since the tables will 970Sstevel@tonic-gate * be smaller than they otherwise would be. 980Sstevel@tonic-gate */ 990Sstevel@tonic-gate int 1000Sstevel@tonic-gate kphysm_add_memory_dynamic(pfn_t base, pgcnt_t npgs) 1010Sstevel@tonic-gate { 1020Sstevel@tonic-gate page_t *pp; 1030Sstevel@tonic-gate page_t *opp, *oepp; 1040Sstevel@tonic-gate struct memseg *seg; 1050Sstevel@tonic-gate uint64_t avmem; 1060Sstevel@tonic-gate pfn_t pfn; 1070Sstevel@tonic-gate pfn_t pt_base = base; 1080Sstevel@tonic-gate pgcnt_t tpgs = npgs; 1090Sstevel@tonic-gate pgcnt_t metapgs; 1100Sstevel@tonic-gate int exhausted; 1110Sstevel@tonic-gate pfn_t pnum; 1120Sstevel@tonic-gate int mnode; 1130Sstevel@tonic-gate caddr_t vaddr; 1140Sstevel@tonic-gate int reuse; 1150Sstevel@tonic-gate int mlret; 1160Sstevel@tonic-gate void *mapva; 1170Sstevel@tonic-gate pgcnt_t nkpmpgs = 0; 1180Sstevel@tonic-gate offset_t kpm_pages_off; 1190Sstevel@tonic-gate 1200Sstevel@tonic-gate cmn_err(CE_CONT, 1210Sstevel@tonic-gate "?kphysm_add_memory_dynamic: adding %ldK at 0x%" PRIx64 "\n", 1220Sstevel@tonic-gate npgs << (PAGESHIFT - 10), (uint64_t)base << PAGESHIFT); 1230Sstevel@tonic-gate 1240Sstevel@tonic-gate /* 1250Sstevel@tonic-gate * Add this span in the delete list to prevent interactions. 1260Sstevel@tonic-gate */ 1270Sstevel@tonic-gate if (!delspan_reserve(base, npgs)) { 1280Sstevel@tonic-gate return (KPHYSM_ESPAN); 1290Sstevel@tonic-gate } 1300Sstevel@tonic-gate /* 1310Sstevel@tonic-gate * Check to see if any of the memory span has been added 1320Sstevel@tonic-gate * by trying an add to the installed memory list. This 1330Sstevel@tonic-gate * forms the interlocking process for add. 1340Sstevel@tonic-gate */ 1350Sstevel@tonic-gate 1360Sstevel@tonic-gate memlist_write_lock(); 1370Sstevel@tonic-gate 1380Sstevel@tonic-gate mlret = memlist_add_span((uint64_t)(pt_base) << PAGESHIFT, 1390Sstevel@tonic-gate (uint64_t)(tpgs) << PAGESHIFT, &phys_install); 1400Sstevel@tonic-gate 1410Sstevel@tonic-gate if (mlret == MEML_SPANOP_OK) 1420Sstevel@tonic-gate installed_top_size(phys_install, &physmax, &physinstalled); 1430Sstevel@tonic-gate 1440Sstevel@tonic-gate memlist_write_unlock(); 1450Sstevel@tonic-gate 1460Sstevel@tonic-gate if (mlret != MEML_SPANOP_OK) { 1470Sstevel@tonic-gate if (mlret == MEML_SPANOP_EALLOC) { 1480Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs); 1490Sstevel@tonic-gate return (KPHYSM_ERESOURCE); 1500Sstevel@tonic-gate } else 1510Sstevel@tonic-gate if (mlret == MEML_SPANOP_ESPAN) { 1520Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs); 1530Sstevel@tonic-gate return (KPHYSM_ESPAN); 1540Sstevel@tonic-gate } else { 1550Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs); 1560Sstevel@tonic-gate return (KPHYSM_ERESOURCE); 1570Sstevel@tonic-gate } 1580Sstevel@tonic-gate } 1590Sstevel@tonic-gate 1600Sstevel@tonic-gate /* 1610Sstevel@tonic-gate * We store the page_t's for this new memory in the first 1620Sstevel@tonic-gate * few pages of the chunk. Here, we go and get'em ... 1630Sstevel@tonic-gate */ 1640Sstevel@tonic-gate 1650Sstevel@tonic-gate /* 1660Sstevel@tonic-gate * The expression after the '-' gives the number of pages 1670Sstevel@tonic-gate * that will fit in the new memory based on a requirement 1680Sstevel@tonic-gate * of (PAGESIZE + sizeof (page_t)) bytes per page. 1690Sstevel@tonic-gate */ 1700Sstevel@tonic-gate metapgs = npgs - (((uint64_t)(npgs) << PAGESHIFT) / 1710Sstevel@tonic-gate (PAGESIZE + sizeof (page_t))); 1720Sstevel@tonic-gate 1730Sstevel@tonic-gate npgs -= metapgs; 1740Sstevel@tonic-gate base += metapgs; 1750Sstevel@tonic-gate 1760Sstevel@tonic-gate ASSERT(btopr(npgs * sizeof (page_t)) <= metapgs); 1770Sstevel@tonic-gate 1780Sstevel@tonic-gate exhausted = (metapgs == 0 || npgs == 0); 1790Sstevel@tonic-gate 1800Sstevel@tonic-gate if (kpm_enable && !exhausted) { 1810Sstevel@tonic-gate pgcnt_t start, end, nkpmpgs_prelim; 1820Sstevel@tonic-gate size_t ptsz; 1830Sstevel@tonic-gate 1840Sstevel@tonic-gate /* 1850Sstevel@tonic-gate * A viable kpm large page mapping must not overlap two 1860Sstevel@tonic-gate * dynamic memsegs. Therefore the total size is checked 1870Sstevel@tonic-gate * to be at least kpm_pgsz and also whether start and end 1880Sstevel@tonic-gate * points are at least kpm_pgsz aligned. 1890Sstevel@tonic-gate */ 1900Sstevel@tonic-gate if (ptokpmp(tpgs) < 1 || pmodkpmp(pt_base) || 1910Sstevel@tonic-gate pmodkpmp(base + npgs)) { 1920Sstevel@tonic-gate 1930Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs); 1940Sstevel@tonic-gate 1950Sstevel@tonic-gate /* 1960Sstevel@tonic-gate * There is no specific error code for violating 1970Sstevel@tonic-gate * kpm granularity constraints. 1980Sstevel@tonic-gate */ 1990Sstevel@tonic-gate return (KPHYSM_ENOTVIABLE); 2000Sstevel@tonic-gate } 2010Sstevel@tonic-gate 2020Sstevel@tonic-gate start = kpmptop(ptokpmp(base)); 2030Sstevel@tonic-gate end = kpmptop(ptokpmp(base + npgs)); 2040Sstevel@tonic-gate nkpmpgs_prelim = ptokpmp(end - start); 2050Sstevel@tonic-gate ptsz = npgs * sizeof (page_t); 2060Sstevel@tonic-gate metapgs = btopr(ptsz + nkpmpgs_prelim * KPMPAGE_T_SZ); 2070Sstevel@tonic-gate exhausted = (tpgs <= metapgs); 2080Sstevel@tonic-gate if (!exhausted) { 2090Sstevel@tonic-gate npgs = tpgs - metapgs; 2100Sstevel@tonic-gate base = pt_base + metapgs; 2110Sstevel@tonic-gate 2120Sstevel@tonic-gate /* final nkpmpgs */ 2130Sstevel@tonic-gate start = kpmptop(ptokpmp(base)); 2140Sstevel@tonic-gate nkpmpgs = ptokpmp(end - start); 2150Sstevel@tonic-gate kpm_pages_off = ptsz + 2160Sstevel@tonic-gate (nkpmpgs_prelim - nkpmpgs) * KPMPAGE_T_SZ; 2170Sstevel@tonic-gate } 2180Sstevel@tonic-gate } 2190Sstevel@tonic-gate 2200Sstevel@tonic-gate /* 2210Sstevel@tonic-gate * Is memory area supplied too small? 2220Sstevel@tonic-gate */ 2230Sstevel@tonic-gate if (exhausted) { 2240Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs); 2250Sstevel@tonic-gate 2260Sstevel@tonic-gate /* 2270Sstevel@tonic-gate * There is no specific error code for 'too small'. 2280Sstevel@tonic-gate */ 2290Sstevel@tonic-gate return (KPHYSM_ERESOURCE); 2300Sstevel@tonic-gate } 2310Sstevel@tonic-gate 2320Sstevel@tonic-gate /* 2330Sstevel@tonic-gate * We may re-use a previously allocated VA space for the page_ts 2340Sstevel@tonic-gate * eventually, but we need to initialize and lock the pages first. 2350Sstevel@tonic-gate */ 2360Sstevel@tonic-gate 2370Sstevel@tonic-gate /* 2380Sstevel@tonic-gate * Get an address in the kernel address map, map 2390Sstevel@tonic-gate * the page_t pages and see if we can touch them. 2400Sstevel@tonic-gate */ 2410Sstevel@tonic-gate 2420Sstevel@tonic-gate mapva = vmem_alloc(heap_arena, ptob(metapgs), VM_NOSLEEP); 2430Sstevel@tonic-gate if (mapva == NULL) { 2440Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_add_memory_dynamic:" 2450Sstevel@tonic-gate " Can't allocate VA for page_ts"); 2460Sstevel@tonic-gate 2470Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs); 2480Sstevel@tonic-gate 2490Sstevel@tonic-gate return (KPHYSM_ERESOURCE); 2500Sstevel@tonic-gate } 2510Sstevel@tonic-gate pp = mapva; 2520Sstevel@tonic-gate 2530Sstevel@tonic-gate if (physmax < (pt_base + tpgs)) 2540Sstevel@tonic-gate physmax = (pt_base + tpgs); 2550Sstevel@tonic-gate 2560Sstevel@tonic-gate /* 2570Sstevel@tonic-gate * In the remapping code we map one page at a time so we must do 2580Sstevel@tonic-gate * the same here to match mapping sizes. 2590Sstevel@tonic-gate */ 2600Sstevel@tonic-gate pfn = pt_base; 2610Sstevel@tonic-gate vaddr = (caddr_t)pp; 2620Sstevel@tonic-gate for (pnum = 0; pnum < metapgs; pnum++) { 2630Sstevel@tonic-gate hat_devload(kas.a_hat, vaddr, ptob(1), pfn, 2640Sstevel@tonic-gate PROT_READ | PROT_WRITE, 2650Sstevel@tonic-gate HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST); 2660Sstevel@tonic-gate pfn++; 2670Sstevel@tonic-gate vaddr += ptob(1); 2680Sstevel@tonic-gate } 2690Sstevel@tonic-gate 2700Sstevel@tonic-gate if (ddi_peek32((dev_info_t *)NULL, 2710Sstevel@tonic-gate (int32_t *)pp, (int32_t *)0) == DDI_FAILURE) { 2720Sstevel@tonic-gate 2730Sstevel@tonic-gate cmn_err(CE_PANIC, "kphysm_add_memory_dynamic:" 2740Sstevel@tonic-gate " Can't access pp array at 0x%p [phys 0x%lx]", 2750Sstevel@tonic-gate (void *)pp, pt_base); 2760Sstevel@tonic-gate 2770Sstevel@tonic-gate hat_unload(kas.a_hat, (caddr_t)pp, ptob(metapgs), 2780Sstevel@tonic-gate HAT_UNLOAD_UNMAP|HAT_UNLOAD_UNLOCK); 2790Sstevel@tonic-gate 2800Sstevel@tonic-gate vmem_free(heap_arena, mapva, ptob(metapgs)); 2810Sstevel@tonic-gate 2820Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs); 2830Sstevel@tonic-gate 2840Sstevel@tonic-gate return (KPHYSM_EFAULT); 2850Sstevel@tonic-gate } 2860Sstevel@tonic-gate 2870Sstevel@tonic-gate /* 2880Sstevel@tonic-gate * Add this memory slice to its memory node translation. 2890Sstevel@tonic-gate * 2900Sstevel@tonic-gate * Note that right now, each node may have only one slice; 2910Sstevel@tonic-gate * this may change with COD or in larger SSM systems with 2920Sstevel@tonic-gate * nested latency groups, so we must not assume that the 2930Sstevel@tonic-gate * node does not yet exist. 2940Sstevel@tonic-gate */ 2950Sstevel@tonic-gate pnum = base + npgs - 1; 2960Sstevel@tonic-gate mem_node_add_slice(base, pnum); 2970Sstevel@tonic-gate 2980Sstevel@tonic-gate /* 299*5331Samw * Allocate or resize page counters as necessary to accommodate 3000Sstevel@tonic-gate * the increase in memory pages. 3010Sstevel@tonic-gate */ 3020Sstevel@tonic-gate mnode = PFN_2_MEM_NODE(pnum); 3030Sstevel@tonic-gate if (page_ctrs_adjust(mnode) != 0) { 3040Sstevel@tonic-gate 3050Sstevel@tonic-gate mem_node_pre_del_slice(base, pnum); 3060Sstevel@tonic-gate mem_node_post_del_slice(base, pnum, 0); 3070Sstevel@tonic-gate 3080Sstevel@tonic-gate hat_unload(kas.a_hat, (caddr_t)pp, ptob(metapgs), 3090Sstevel@tonic-gate HAT_UNLOAD_UNMAP|HAT_UNLOAD_UNLOCK); 3100Sstevel@tonic-gate 3110Sstevel@tonic-gate vmem_free(heap_arena, mapva, ptob(metapgs)); 3120Sstevel@tonic-gate 3130Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs); 3140Sstevel@tonic-gate 3150Sstevel@tonic-gate return (KPHYSM_ERESOURCE); 3160Sstevel@tonic-gate } 3170Sstevel@tonic-gate 3180Sstevel@tonic-gate /* 3190Sstevel@tonic-gate * Update the phys_avail memory list. 3200Sstevel@tonic-gate * The phys_install list was done at the start. 3210Sstevel@tonic-gate */ 3220Sstevel@tonic-gate 3230Sstevel@tonic-gate memlist_write_lock(); 3240Sstevel@tonic-gate 3250Sstevel@tonic-gate mlret = memlist_add_span((uint64_t)(base) << PAGESHIFT, 3260Sstevel@tonic-gate (uint64_t)(npgs) << PAGESHIFT, &phys_avail); 3270Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK); 3280Sstevel@tonic-gate 3290Sstevel@tonic-gate memlist_write_unlock(); 3300Sstevel@tonic-gate 3310Sstevel@tonic-gate /* See if we can find a memseg to re-use. */ 3320Sstevel@tonic-gate seg = memseg_reuse(metapgs); 3330Sstevel@tonic-gate 3340Sstevel@tonic-gate reuse = (seg != NULL); 3350Sstevel@tonic-gate 3360Sstevel@tonic-gate /* 3370Sstevel@tonic-gate * Initialize the memseg structure representing this memory 3380Sstevel@tonic-gate * and add it to the existing list of memsegs. Do some basic 3390Sstevel@tonic-gate * initialization and add the memory to the system. 3400Sstevel@tonic-gate * In order to prevent lock deadlocks, the add_physmem() 3410Sstevel@tonic-gate * code is repeated here, but split into several stages. 3420Sstevel@tonic-gate */ 3430Sstevel@tonic-gate if (seg == NULL) { 3440Sstevel@tonic-gate seg = kmem_cache_alloc(memseg_cache, KM_SLEEP); 3450Sstevel@tonic-gate bzero(seg, sizeof (struct memseg)); 3460Sstevel@tonic-gate seg->msegflags = MEMSEG_DYNAMIC; 3470Sstevel@tonic-gate seg->pages = pp; 3480Sstevel@tonic-gate } else { 3490Sstevel@tonic-gate /*EMPTY*/ 3500Sstevel@tonic-gate ASSERT(seg->msegflags & MEMSEG_DYNAMIC); 3510Sstevel@tonic-gate } 3520Sstevel@tonic-gate 3530Sstevel@tonic-gate seg->epages = seg->pages + npgs; 3540Sstevel@tonic-gate seg->pages_base = base; 3550Sstevel@tonic-gate seg->pages_end = base + npgs; 3560Sstevel@tonic-gate 3570Sstevel@tonic-gate /* 3580Sstevel@tonic-gate * Initialize metadata. The page_ts are set to locked state 3590Sstevel@tonic-gate * ready to be freed. 3600Sstevel@tonic-gate */ 3610Sstevel@tonic-gate bzero((caddr_t)pp, ptob(metapgs)); 3620Sstevel@tonic-gate 3630Sstevel@tonic-gate pfn = seg->pages_base; 3640Sstevel@tonic-gate /* Save the original pp base in case we reuse a memseg. */ 3650Sstevel@tonic-gate opp = pp; 3660Sstevel@tonic-gate oepp = opp + npgs; 3670Sstevel@tonic-gate for (pp = opp; pp < oepp; pp++) { 3680Sstevel@tonic-gate pp->p_pagenum = pfn; 3690Sstevel@tonic-gate pfn++; 3700Sstevel@tonic-gate page_iolock_init(pp); 3710Sstevel@tonic-gate while (!page_lock(pp, SE_EXCL, (kmutex_t *)NULL, P_RECLAIM)) 3720Sstevel@tonic-gate continue; 3730Sstevel@tonic-gate pp->p_offset = (u_offset_t)-1; 3740Sstevel@tonic-gate } 3750Sstevel@tonic-gate 3760Sstevel@tonic-gate if (reuse) { 3770Sstevel@tonic-gate /* Remap our page_ts to the re-used memseg VA space. */ 3780Sstevel@tonic-gate pfn = pt_base; 3790Sstevel@tonic-gate vaddr = (caddr_t)seg->pages; 3800Sstevel@tonic-gate for (pnum = 0; pnum < metapgs; pnum++) { 3810Sstevel@tonic-gate hat_devload(kas.a_hat, vaddr, ptob(1), pfn, 3820Sstevel@tonic-gate PROT_READ | PROT_WRITE, 3830Sstevel@tonic-gate HAT_LOAD_REMAP | HAT_LOAD | HAT_LOAD_NOCONSIST); 3840Sstevel@tonic-gate pfn++; 3850Sstevel@tonic-gate vaddr += ptob(1); 3860Sstevel@tonic-gate } 3870Sstevel@tonic-gate 3880Sstevel@tonic-gate hat_unload(kas.a_hat, (caddr_t)opp, ptob(metapgs), 3890Sstevel@tonic-gate HAT_UNLOAD_UNMAP|HAT_UNLOAD_UNLOCK); 3900Sstevel@tonic-gate 3910Sstevel@tonic-gate vmem_free(heap_arena, mapva, ptob(metapgs)); 3920Sstevel@tonic-gate } 3930Sstevel@tonic-gate 3940Sstevel@tonic-gate hat_kpm_addmem_mseg_update(seg, nkpmpgs, kpm_pages_off); 3950Sstevel@tonic-gate 3960Sstevel@tonic-gate memsegs_lock(1); 3970Sstevel@tonic-gate 3980Sstevel@tonic-gate /* 3990Sstevel@tonic-gate * The new memseg is inserted at the beginning of the list. 4000Sstevel@tonic-gate * Not only does this save searching for the tail, but in the 4010Sstevel@tonic-gate * case of a re-used memseg, it solves the problem of what 4020Sstevel@tonic-gate * happens of some process has still got a pointer to the 4030Sstevel@tonic-gate * memseg and follows the next pointer to continue traversing 4040Sstevel@tonic-gate * the memsegs list. 4050Sstevel@tonic-gate */ 4060Sstevel@tonic-gate 4070Sstevel@tonic-gate hat_kpm_addmem_mseg_insert(seg); 4080Sstevel@tonic-gate 4090Sstevel@tonic-gate seg->next = memsegs; 4100Sstevel@tonic-gate membar_producer(); 4110Sstevel@tonic-gate 4120Sstevel@tonic-gate hat_kpm_addmem_memsegs_update(seg); 4130Sstevel@tonic-gate 4140Sstevel@tonic-gate memsegs = seg; 4150Sstevel@tonic-gate 4160Sstevel@tonic-gate build_pfn_hash(); 4170Sstevel@tonic-gate 4180Sstevel@tonic-gate total_pages += npgs; 4190Sstevel@tonic-gate 4200Sstevel@tonic-gate /* 4210Sstevel@tonic-gate * Recalculate the paging parameters now total_pages has changed. 4220Sstevel@tonic-gate * This will also cause the clock hands to be reset before next use. 4230Sstevel@tonic-gate */ 4240Sstevel@tonic-gate setupclock(1); 4250Sstevel@tonic-gate 4260Sstevel@tonic-gate memsegs_unlock(1); 4270Sstevel@tonic-gate 4281582Skchow PLCNT_MODIFY_MAX(seg->pages_base, (long)npgs); 4291582Skchow 4300Sstevel@tonic-gate /* 4310Sstevel@tonic-gate * Free the pages outside the lock to avoid locking loops. 4320Sstevel@tonic-gate */ 4330Sstevel@tonic-gate for (pp = seg->pages; pp < seg->epages; pp++) { 4340Sstevel@tonic-gate page_free(pp, 1); 4350Sstevel@tonic-gate } 4360Sstevel@tonic-gate 4370Sstevel@tonic-gate /* 4380Sstevel@tonic-gate * Now that we've updated the appropriate memory lists we 4390Sstevel@tonic-gate * need to reset a number of globals, since we've increased memory. 4400Sstevel@tonic-gate * Several have already been updated for us as noted above. The 4410Sstevel@tonic-gate * globals we're interested in at this point are: 4420Sstevel@tonic-gate * physmax - highest page frame number. 4430Sstevel@tonic-gate * physinstalled - number of pages currently installed (done earlier) 4440Sstevel@tonic-gate * maxmem - max free pages in the system 4450Sstevel@tonic-gate * physmem - physical memory pages available 4460Sstevel@tonic-gate * availrmem - real memory available 4470Sstevel@tonic-gate */ 4480Sstevel@tonic-gate 4490Sstevel@tonic-gate mutex_enter(&freemem_lock); 4500Sstevel@tonic-gate maxmem += npgs; 4510Sstevel@tonic-gate physmem += npgs; 4520Sstevel@tonic-gate availrmem += npgs; 4530Sstevel@tonic-gate availrmem_initial += npgs; 4540Sstevel@tonic-gate 4550Sstevel@tonic-gate mutex_exit(&freemem_lock); 4560Sstevel@tonic-gate 4570Sstevel@tonic-gate dump_resize(); 4580Sstevel@tonic-gate 4590Sstevel@tonic-gate page_freelist_coalesce_all(mnode); 4600Sstevel@tonic-gate 4610Sstevel@tonic-gate kphysm_setup_post_add(npgs); 4620Sstevel@tonic-gate 4630Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_add_memory_dynamic: mem = %ldK " 4640Sstevel@tonic-gate "(0x%" PRIx64 ")\n", 4650Sstevel@tonic-gate physinstalled << (PAGESHIFT - 10), 4660Sstevel@tonic-gate (uint64_t)physinstalled << PAGESHIFT); 4670Sstevel@tonic-gate 4680Sstevel@tonic-gate avmem = (uint64_t)freemem << PAGESHIFT; 4690Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_add_memory_dynamic: " 4700Sstevel@tonic-gate "avail mem = %" PRId64 "\n", avmem); 4710Sstevel@tonic-gate 4720Sstevel@tonic-gate /* 4730Sstevel@tonic-gate * Update lgroup generation number on single lgroup systems 4740Sstevel@tonic-gate */ 4750Sstevel@tonic-gate if (nlgrps == 1) 4760Sstevel@tonic-gate lgrp_config(LGRP_CONFIG_GEN_UPDATE, 0, 0); 4770Sstevel@tonic-gate 4780Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs); 4790Sstevel@tonic-gate return (KPHYSM_OK); /* Successfully added system memory */ 4800Sstevel@tonic-gate 4810Sstevel@tonic-gate } 4820Sstevel@tonic-gate 4830Sstevel@tonic-gate /* 4840Sstevel@tonic-gate * There are various error conditions in kphysm_add_memory_dynamic() 4850Sstevel@tonic-gate * which require a rollback of already changed global state. 4860Sstevel@tonic-gate */ 4870Sstevel@tonic-gate static void 4880Sstevel@tonic-gate kphysm_addmem_error_undospan(pfn_t pt_base, pgcnt_t tpgs) 4890Sstevel@tonic-gate { 4900Sstevel@tonic-gate int mlret; 4910Sstevel@tonic-gate 4920Sstevel@tonic-gate /* Unreserve memory span. */ 4930Sstevel@tonic-gate memlist_write_lock(); 4940Sstevel@tonic-gate 4950Sstevel@tonic-gate mlret = memlist_delete_span( 4960Sstevel@tonic-gate (uint64_t)(pt_base) << PAGESHIFT, 4970Sstevel@tonic-gate (uint64_t)(tpgs) << PAGESHIFT, &phys_install); 4980Sstevel@tonic-gate 4990Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK); 5000Sstevel@tonic-gate phys_install_has_changed(); 5010Sstevel@tonic-gate installed_top_size(phys_install, &physmax, &physinstalled); 5020Sstevel@tonic-gate 5030Sstevel@tonic-gate memlist_write_unlock(); 5040Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs); 5050Sstevel@tonic-gate } 5060Sstevel@tonic-gate 5070Sstevel@tonic-gate /* 5080Sstevel@tonic-gate * Only return an available memseg of exactly the right size. 5090Sstevel@tonic-gate * When the meta data area has it's own virtual address space 5100Sstevel@tonic-gate * we will need to manage this more carefully and do best fit 511*5331Samw * allocations, possibly splitting an available area. 5120Sstevel@tonic-gate */ 5130Sstevel@tonic-gate static struct memseg * 5140Sstevel@tonic-gate memseg_reuse(pgcnt_t metapgs) 5150Sstevel@tonic-gate { 5160Sstevel@tonic-gate struct memseg **segpp, *seg; 5170Sstevel@tonic-gate 5180Sstevel@tonic-gate mutex_enter(&memseg_lists_lock); 5190Sstevel@tonic-gate 5200Sstevel@tonic-gate segpp = &memseg_va_avail; 5210Sstevel@tonic-gate for (; (seg = *segpp) != NULL; segpp = &seg->lnext) { 5220Sstevel@tonic-gate caddr_t end; 5230Sstevel@tonic-gate 5240Sstevel@tonic-gate if (kpm_enable) 5250Sstevel@tonic-gate end = hat_kpm_mseg_reuse(seg); 5260Sstevel@tonic-gate else 5270Sstevel@tonic-gate end = (caddr_t)seg->epages; 5280Sstevel@tonic-gate 5290Sstevel@tonic-gate if (btopr(end - (caddr_t)seg->pages) == metapgs) { 5300Sstevel@tonic-gate *segpp = seg->lnext; 5310Sstevel@tonic-gate seg->lnext = NULL; 5320Sstevel@tonic-gate break; 5330Sstevel@tonic-gate } 5340Sstevel@tonic-gate } 5350Sstevel@tonic-gate mutex_exit(&memseg_lists_lock); 5360Sstevel@tonic-gate 5370Sstevel@tonic-gate return (seg); 5380Sstevel@tonic-gate } 5390Sstevel@tonic-gate 5400Sstevel@tonic-gate static uint_t handle_gen; 5410Sstevel@tonic-gate 5420Sstevel@tonic-gate struct memdelspan { 5430Sstevel@tonic-gate struct memdelspan *mds_next; 5440Sstevel@tonic-gate pfn_t mds_base; 5450Sstevel@tonic-gate pgcnt_t mds_npgs; 5460Sstevel@tonic-gate uint_t *mds_bitmap; 5470Sstevel@tonic-gate uint_t *mds_bitmap_retired; 5480Sstevel@tonic-gate }; 5490Sstevel@tonic-gate 5500Sstevel@tonic-gate #define NBPBMW (sizeof (uint_t) * NBBY) 5510Sstevel@tonic-gate #define MDS_BITMAPBYTES(MDSP) \ 5520Sstevel@tonic-gate ((((MDSP)->mds_npgs + NBPBMW - 1) / NBPBMW) * sizeof (uint_t)) 5530Sstevel@tonic-gate 5540Sstevel@tonic-gate struct transit_list { 5550Sstevel@tonic-gate struct transit_list *trl_next; 5560Sstevel@tonic-gate struct memdelspan *trl_spans; 5570Sstevel@tonic-gate int trl_collect; 5580Sstevel@tonic-gate }; 5590Sstevel@tonic-gate 5600Sstevel@tonic-gate struct transit_list_head { 5610Sstevel@tonic-gate kmutex_t trh_lock; 5620Sstevel@tonic-gate struct transit_list *trh_head; 5630Sstevel@tonic-gate }; 5640Sstevel@tonic-gate 5650Sstevel@tonic-gate static struct transit_list_head transit_list_head; 5660Sstevel@tonic-gate 5670Sstevel@tonic-gate struct mem_handle; 5680Sstevel@tonic-gate static void transit_list_collect(struct mem_handle *, int); 5690Sstevel@tonic-gate static void transit_list_insert(struct transit_list *); 5700Sstevel@tonic-gate static void transit_list_remove(struct transit_list *); 5710Sstevel@tonic-gate 5720Sstevel@tonic-gate #ifdef DEBUG 5730Sstevel@tonic-gate #define MEM_DEL_STATS 5740Sstevel@tonic-gate #endif /* DEBUG */ 5750Sstevel@tonic-gate 5760Sstevel@tonic-gate #ifdef MEM_DEL_STATS 5770Sstevel@tonic-gate static int mem_del_stat_print = 0; 5780Sstevel@tonic-gate struct mem_del_stat { 5790Sstevel@tonic-gate uint_t nloop; 5800Sstevel@tonic-gate uint_t need_free; 5810Sstevel@tonic-gate uint_t free_loop; 5820Sstevel@tonic-gate uint_t free_low; 5830Sstevel@tonic-gate uint_t free_failed; 5840Sstevel@tonic-gate uint_t ncheck; 5850Sstevel@tonic-gate uint_t nopaget; 5860Sstevel@tonic-gate uint_t lockfail; 5870Sstevel@tonic-gate uint_t nfree; 5880Sstevel@tonic-gate uint_t nreloc; 5890Sstevel@tonic-gate uint_t nrelocfail; 5900Sstevel@tonic-gate uint_t already_done; 5910Sstevel@tonic-gate uint_t first_notfree; 5920Sstevel@tonic-gate uint_t npplocked; 5930Sstevel@tonic-gate uint_t nlockreloc; 5940Sstevel@tonic-gate uint_t nnorepl; 5950Sstevel@tonic-gate uint_t nmodreloc; 5960Sstevel@tonic-gate uint_t ndestroy; 5970Sstevel@tonic-gate uint_t nputpage; 5980Sstevel@tonic-gate uint_t nnoreclaim; 5990Sstevel@tonic-gate uint_t ndelay; 6000Sstevel@tonic-gate uint_t demotefail; 6010Sstevel@tonic-gate uint64_t nticks_total; 6020Sstevel@tonic-gate uint64_t nticks_pgrp; 6030Sstevel@tonic-gate uint_t retired; 6040Sstevel@tonic-gate uint_t toxic; 6050Sstevel@tonic-gate uint_t failing; 6060Sstevel@tonic-gate uint_t modtoxic; 6070Sstevel@tonic-gate uint_t npplkdtoxic; 6080Sstevel@tonic-gate uint_t gptlmodfail; 6090Sstevel@tonic-gate uint_t gptllckfail; 6100Sstevel@tonic-gate }; 6110Sstevel@tonic-gate /* 6120Sstevel@tonic-gate * The stat values are only incremented in the delete thread 6130Sstevel@tonic-gate * so no locking or atomic required. 6140Sstevel@tonic-gate */ 6150Sstevel@tonic-gate #define MDSTAT_INCR(MHP, FLD) (MHP)->mh_delstat.FLD++ 6160Sstevel@tonic-gate #define MDSTAT_TOTAL(MHP, ntck) ((MHP)->mh_delstat.nticks_total += (ntck)) 6170Sstevel@tonic-gate #define MDSTAT_PGRP(MHP, ntck) ((MHP)->mh_delstat.nticks_pgrp += (ntck)) 6180Sstevel@tonic-gate static void mem_del_stat_print_func(struct mem_handle *); 6190Sstevel@tonic-gate #define MDSTAT_PRINT(MHP) mem_del_stat_print_func((MHP)) 6200Sstevel@tonic-gate #else /* MEM_DEL_STATS */ 6210Sstevel@tonic-gate #define MDSTAT_INCR(MHP, FLD) 6220Sstevel@tonic-gate #define MDSTAT_TOTAL(MHP, ntck) 6230Sstevel@tonic-gate #define MDSTAT_PGRP(MHP, ntck) 6240Sstevel@tonic-gate #define MDSTAT_PRINT(MHP) 6250Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 6260Sstevel@tonic-gate 6270Sstevel@tonic-gate typedef enum mhnd_state {MHND_FREE = 0, MHND_INIT, MHND_STARTING, 6280Sstevel@tonic-gate MHND_RUNNING, MHND_DONE, MHND_RELEASE} mhnd_state_t; 6290Sstevel@tonic-gate 6300Sstevel@tonic-gate /* 6310Sstevel@tonic-gate * mh_mutex must be taken to examine or change mh_exthandle and mh_state. 6320Sstevel@tonic-gate * The mutex may not be required for other fields, dependent on mh_state. 6330Sstevel@tonic-gate */ 6340Sstevel@tonic-gate struct mem_handle { 6350Sstevel@tonic-gate kmutex_t mh_mutex; 6360Sstevel@tonic-gate struct mem_handle *mh_next; 6370Sstevel@tonic-gate memhandle_t mh_exthandle; 6380Sstevel@tonic-gate mhnd_state_t mh_state; 6390Sstevel@tonic-gate struct transit_list mh_transit; 6400Sstevel@tonic-gate pgcnt_t mh_phys_pages; 6410Sstevel@tonic-gate pgcnt_t mh_vm_pages; 6420Sstevel@tonic-gate pgcnt_t mh_hold_todo; 6430Sstevel@tonic-gate void (*mh_delete_complete)(void *, int error); 6440Sstevel@tonic-gate void *mh_delete_complete_arg; 6450Sstevel@tonic-gate volatile uint_t mh_cancel; 6460Sstevel@tonic-gate volatile uint_t mh_dr_aio_cleanup_cancel; 6470Sstevel@tonic-gate volatile uint_t mh_aio_cleanup_done; 6480Sstevel@tonic-gate kcondvar_t mh_cv; 6490Sstevel@tonic-gate kthread_id_t mh_thread_id; 6500Sstevel@tonic-gate page_t *mh_deleted; /* link through p_next */ 6510Sstevel@tonic-gate #ifdef MEM_DEL_STATS 6520Sstevel@tonic-gate struct mem_del_stat mh_delstat; 6530Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 6540Sstevel@tonic-gate }; 6550Sstevel@tonic-gate 6560Sstevel@tonic-gate static struct mem_handle *mem_handle_head; 6570Sstevel@tonic-gate static kmutex_t mem_handle_list_mutex; 6580Sstevel@tonic-gate 6590Sstevel@tonic-gate static struct mem_handle * 6600Sstevel@tonic-gate kphysm_allocate_mem_handle() 6610Sstevel@tonic-gate { 6620Sstevel@tonic-gate struct mem_handle *mhp; 6630Sstevel@tonic-gate 6640Sstevel@tonic-gate mhp = kmem_zalloc(sizeof (struct mem_handle), KM_SLEEP); 6650Sstevel@tonic-gate mutex_init(&mhp->mh_mutex, NULL, MUTEX_DEFAULT, NULL); 6660Sstevel@tonic-gate mutex_enter(&mem_handle_list_mutex); 6670Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 6680Sstevel@tonic-gate /* handle_gen is protected by list mutex. */ 669567Sdmick mhp->mh_exthandle = (memhandle_t)(uintptr_t)(++handle_gen); 6700Sstevel@tonic-gate mhp->mh_next = mem_handle_head; 6710Sstevel@tonic-gate mem_handle_head = mhp; 6720Sstevel@tonic-gate mutex_exit(&mem_handle_list_mutex); 6730Sstevel@tonic-gate 6740Sstevel@tonic-gate return (mhp); 6750Sstevel@tonic-gate } 6760Sstevel@tonic-gate 6770Sstevel@tonic-gate static void 6780Sstevel@tonic-gate kphysm_free_mem_handle(struct mem_handle *mhp) 6790Sstevel@tonic-gate { 6800Sstevel@tonic-gate struct mem_handle **mhpp; 6810Sstevel@tonic-gate 6820Sstevel@tonic-gate ASSERT(mutex_owned(&mhp->mh_mutex)); 6830Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_FREE); 6840Sstevel@tonic-gate /* 6850Sstevel@tonic-gate * Exit the mutex to preserve locking order. This is OK 6860Sstevel@tonic-gate * here as once in the FREE state, the handle cannot 6870Sstevel@tonic-gate * be found by a lookup. 6880Sstevel@tonic-gate */ 6890Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 6900Sstevel@tonic-gate 6910Sstevel@tonic-gate mutex_enter(&mem_handle_list_mutex); 6920Sstevel@tonic-gate mhpp = &mem_handle_head; 6930Sstevel@tonic-gate while (*mhpp != NULL && *mhpp != mhp) 6940Sstevel@tonic-gate mhpp = &(*mhpp)->mh_next; 6950Sstevel@tonic-gate ASSERT(*mhpp == mhp); 6960Sstevel@tonic-gate /* 6970Sstevel@tonic-gate * No need to lock the handle (mh_mutex) as only 6980Sstevel@tonic-gate * mh_next changing and this is the only thread that 6990Sstevel@tonic-gate * can be referncing mhp. 7000Sstevel@tonic-gate */ 7010Sstevel@tonic-gate *mhpp = mhp->mh_next; 7020Sstevel@tonic-gate mutex_exit(&mem_handle_list_mutex); 7030Sstevel@tonic-gate 7040Sstevel@tonic-gate mutex_destroy(&mhp->mh_mutex); 7050Sstevel@tonic-gate kmem_free(mhp, sizeof (struct mem_handle)); 7060Sstevel@tonic-gate } 7070Sstevel@tonic-gate 7080Sstevel@tonic-gate /* 7090Sstevel@tonic-gate * This function finds the internal mem_handle corresponding to an 7100Sstevel@tonic-gate * external handle and returns it with the mh_mutex held. 7110Sstevel@tonic-gate */ 7120Sstevel@tonic-gate static struct mem_handle * 7130Sstevel@tonic-gate kphysm_lookup_mem_handle(memhandle_t handle) 7140Sstevel@tonic-gate { 7150Sstevel@tonic-gate struct mem_handle *mhp; 7160Sstevel@tonic-gate 7170Sstevel@tonic-gate mutex_enter(&mem_handle_list_mutex); 7180Sstevel@tonic-gate for (mhp = mem_handle_head; mhp != NULL; mhp = mhp->mh_next) { 7190Sstevel@tonic-gate if (mhp->mh_exthandle == handle) { 7200Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 7210Sstevel@tonic-gate /* 7220Sstevel@tonic-gate * The state of the handle could have been changed 7230Sstevel@tonic-gate * by kphysm_del_release() while waiting for mh_mutex. 7240Sstevel@tonic-gate */ 7250Sstevel@tonic-gate if (mhp->mh_state == MHND_FREE) { 7260Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 7270Sstevel@tonic-gate continue; 7280Sstevel@tonic-gate } 7290Sstevel@tonic-gate break; 7300Sstevel@tonic-gate } 7310Sstevel@tonic-gate } 7320Sstevel@tonic-gate mutex_exit(&mem_handle_list_mutex); 7330Sstevel@tonic-gate return (mhp); 7340Sstevel@tonic-gate } 7350Sstevel@tonic-gate 7360Sstevel@tonic-gate int 7370Sstevel@tonic-gate kphysm_del_gethandle(memhandle_t *xmhp) 7380Sstevel@tonic-gate { 7390Sstevel@tonic-gate struct mem_handle *mhp; 7400Sstevel@tonic-gate 7410Sstevel@tonic-gate mhp = kphysm_allocate_mem_handle(); 7420Sstevel@tonic-gate /* 7430Sstevel@tonic-gate * The handle is allocated using KM_SLEEP, so cannot fail. 7440Sstevel@tonic-gate * If the implementation is changed, the correct error to return 7450Sstevel@tonic-gate * here would be KPHYSM_ENOHANDLES. 7460Sstevel@tonic-gate */ 7470Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_FREE); 7480Sstevel@tonic-gate mhp->mh_state = MHND_INIT; 7490Sstevel@tonic-gate *xmhp = mhp->mh_exthandle; 7500Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 7510Sstevel@tonic-gate return (KPHYSM_OK); 7520Sstevel@tonic-gate } 7530Sstevel@tonic-gate 7540Sstevel@tonic-gate static int 7550Sstevel@tonic-gate overlapping(pfn_t b1, pgcnt_t l1, pfn_t b2, pgcnt_t l2) 7560Sstevel@tonic-gate { 7570Sstevel@tonic-gate pfn_t e1, e2; 7580Sstevel@tonic-gate 7590Sstevel@tonic-gate e1 = b1 + l1; 7600Sstevel@tonic-gate e2 = b2 + l2; 7610Sstevel@tonic-gate 7620Sstevel@tonic-gate return (!(b2 >= e1 || b1 >= e2)); 7630Sstevel@tonic-gate } 7640Sstevel@tonic-gate 7650Sstevel@tonic-gate static int can_remove_pgs(pgcnt_t); 7660Sstevel@tonic-gate 7670Sstevel@tonic-gate static struct memdelspan * 7680Sstevel@tonic-gate span_to_install(pfn_t base, pgcnt_t npgs) 7690Sstevel@tonic-gate { 7700Sstevel@tonic-gate struct memdelspan *mdsp; 7710Sstevel@tonic-gate struct memdelspan *mdsp_new; 7720Sstevel@tonic-gate uint64_t address, size, thislen; 7730Sstevel@tonic-gate struct memlist *mlp; 7740Sstevel@tonic-gate 7750Sstevel@tonic-gate mdsp_new = NULL; 7760Sstevel@tonic-gate 7770Sstevel@tonic-gate address = (uint64_t)base << PAGESHIFT; 7780Sstevel@tonic-gate size = (uint64_t)npgs << PAGESHIFT; 7790Sstevel@tonic-gate while (size != 0) { 7800Sstevel@tonic-gate memlist_read_lock(); 7810Sstevel@tonic-gate for (mlp = phys_install; mlp != NULL; mlp = mlp->next) { 7820Sstevel@tonic-gate if (address >= (mlp->address + mlp->size)) 7830Sstevel@tonic-gate continue; 7840Sstevel@tonic-gate if ((address + size) > mlp->address) 7850Sstevel@tonic-gate break; 7860Sstevel@tonic-gate } 7870Sstevel@tonic-gate if (mlp == NULL) { 7880Sstevel@tonic-gate address += size; 7890Sstevel@tonic-gate size = 0; 7900Sstevel@tonic-gate thislen = 0; 7910Sstevel@tonic-gate } else { 7920Sstevel@tonic-gate if (address < mlp->address) { 7930Sstevel@tonic-gate size -= (mlp->address - address); 7940Sstevel@tonic-gate address = mlp->address; 7950Sstevel@tonic-gate } 7960Sstevel@tonic-gate ASSERT(address >= mlp->address); 7970Sstevel@tonic-gate if ((address + size) > (mlp->address + mlp->size)) { 7980Sstevel@tonic-gate thislen = mlp->size - (address - mlp->address); 7990Sstevel@tonic-gate } else { 8000Sstevel@tonic-gate thislen = size; 8010Sstevel@tonic-gate } 8020Sstevel@tonic-gate } 8030Sstevel@tonic-gate memlist_read_unlock(); 8040Sstevel@tonic-gate /* TODO: phys_install could change now */ 8050Sstevel@tonic-gate if (thislen == 0) 8060Sstevel@tonic-gate continue; 8070Sstevel@tonic-gate mdsp = kmem_zalloc(sizeof (struct memdelspan), KM_SLEEP); 8080Sstevel@tonic-gate mdsp->mds_base = btop(address); 8090Sstevel@tonic-gate mdsp->mds_npgs = btop(thislen); 8100Sstevel@tonic-gate mdsp->mds_next = mdsp_new; 8110Sstevel@tonic-gate mdsp_new = mdsp; 8120Sstevel@tonic-gate address += thislen; 8130Sstevel@tonic-gate size -= thislen; 8140Sstevel@tonic-gate } 8150Sstevel@tonic-gate return (mdsp_new); 8160Sstevel@tonic-gate } 8170Sstevel@tonic-gate 8180Sstevel@tonic-gate static void 8190Sstevel@tonic-gate free_delspans(struct memdelspan *mdsp) 8200Sstevel@tonic-gate { 8210Sstevel@tonic-gate struct memdelspan *amdsp; 8220Sstevel@tonic-gate 8230Sstevel@tonic-gate while ((amdsp = mdsp) != NULL) { 8240Sstevel@tonic-gate mdsp = amdsp->mds_next; 8250Sstevel@tonic-gate kmem_free(amdsp, sizeof (struct memdelspan)); 8260Sstevel@tonic-gate } 8270Sstevel@tonic-gate } 8280Sstevel@tonic-gate 8290Sstevel@tonic-gate /* 8300Sstevel@tonic-gate * Concatenate lists. No list ordering is required. 8310Sstevel@tonic-gate */ 8320Sstevel@tonic-gate 8330Sstevel@tonic-gate static void 8340Sstevel@tonic-gate delspan_concat(struct memdelspan **mdspp, struct memdelspan *mdsp) 8350Sstevel@tonic-gate { 8360Sstevel@tonic-gate while (*mdspp != NULL) 8370Sstevel@tonic-gate mdspp = &(*mdspp)->mds_next; 8380Sstevel@tonic-gate 8390Sstevel@tonic-gate *mdspp = mdsp; 8400Sstevel@tonic-gate } 8410Sstevel@tonic-gate 8420Sstevel@tonic-gate /* 8430Sstevel@tonic-gate * Given a new list of delspans, check there is no overlap with 8440Sstevel@tonic-gate * all existing span activity (add or delete) and then concatenate 8450Sstevel@tonic-gate * the new spans to the given list. 8460Sstevel@tonic-gate * Return 1 for OK, 0 if overlapping. 8470Sstevel@tonic-gate */ 8480Sstevel@tonic-gate static int 8490Sstevel@tonic-gate delspan_insert( 8500Sstevel@tonic-gate struct transit_list *my_tlp, 8510Sstevel@tonic-gate struct memdelspan *mdsp_new) 8520Sstevel@tonic-gate { 8530Sstevel@tonic-gate struct transit_list_head *trh; 8540Sstevel@tonic-gate struct transit_list *tlp; 8550Sstevel@tonic-gate int ret; 8560Sstevel@tonic-gate 8570Sstevel@tonic-gate trh = &transit_list_head; 8580Sstevel@tonic-gate 8590Sstevel@tonic-gate ASSERT(my_tlp != NULL); 8600Sstevel@tonic-gate ASSERT(mdsp_new != NULL); 8610Sstevel@tonic-gate 8620Sstevel@tonic-gate ret = 1; 8630Sstevel@tonic-gate mutex_enter(&trh->trh_lock); 8640Sstevel@tonic-gate /* ASSERT(my_tlp->trl_spans == NULL || tlp_in_list(trh, my_tlp)); */ 8650Sstevel@tonic-gate for (tlp = trh->trh_head; tlp != NULL; tlp = tlp->trl_next) { 8660Sstevel@tonic-gate struct memdelspan *mdsp; 8670Sstevel@tonic-gate 8680Sstevel@tonic-gate for (mdsp = tlp->trl_spans; mdsp != NULL; 8690Sstevel@tonic-gate mdsp = mdsp->mds_next) { 8700Sstevel@tonic-gate struct memdelspan *nmdsp; 8710Sstevel@tonic-gate 8720Sstevel@tonic-gate for (nmdsp = mdsp_new; nmdsp != NULL; 8730Sstevel@tonic-gate nmdsp = nmdsp->mds_next) { 8740Sstevel@tonic-gate if (overlapping(mdsp->mds_base, mdsp->mds_npgs, 8750Sstevel@tonic-gate nmdsp->mds_base, nmdsp->mds_npgs)) { 8760Sstevel@tonic-gate ret = 0; 8770Sstevel@tonic-gate goto done; 8780Sstevel@tonic-gate } 8790Sstevel@tonic-gate } 8800Sstevel@tonic-gate } 8810Sstevel@tonic-gate } 8820Sstevel@tonic-gate done: 8830Sstevel@tonic-gate if (ret != 0) { 8840Sstevel@tonic-gate if (my_tlp->trl_spans == NULL) 8850Sstevel@tonic-gate transit_list_insert(my_tlp); 8860Sstevel@tonic-gate delspan_concat(&my_tlp->trl_spans, mdsp_new); 8870Sstevel@tonic-gate } 8880Sstevel@tonic-gate mutex_exit(&trh->trh_lock); 8890Sstevel@tonic-gate return (ret); 8900Sstevel@tonic-gate } 8910Sstevel@tonic-gate 8920Sstevel@tonic-gate static void 8930Sstevel@tonic-gate delspan_remove( 8940Sstevel@tonic-gate struct transit_list *my_tlp, 8950Sstevel@tonic-gate pfn_t base, 8960Sstevel@tonic-gate pgcnt_t npgs) 8970Sstevel@tonic-gate { 8980Sstevel@tonic-gate struct transit_list_head *trh; 8990Sstevel@tonic-gate struct memdelspan *mdsp; 9000Sstevel@tonic-gate 9010Sstevel@tonic-gate trh = &transit_list_head; 9020Sstevel@tonic-gate 9030Sstevel@tonic-gate ASSERT(my_tlp != NULL); 9040Sstevel@tonic-gate 9050Sstevel@tonic-gate mutex_enter(&trh->trh_lock); 9060Sstevel@tonic-gate if ((mdsp = my_tlp->trl_spans) != NULL) { 9070Sstevel@tonic-gate if (npgs == 0) { 9080Sstevel@tonic-gate my_tlp->trl_spans = NULL; 9090Sstevel@tonic-gate free_delspans(mdsp); 9100Sstevel@tonic-gate transit_list_remove(my_tlp); 9110Sstevel@tonic-gate } else { 9120Sstevel@tonic-gate struct memdelspan **prv; 9130Sstevel@tonic-gate 9140Sstevel@tonic-gate prv = &my_tlp->trl_spans; 9150Sstevel@tonic-gate while (mdsp != NULL) { 9160Sstevel@tonic-gate pfn_t p_end; 9170Sstevel@tonic-gate 9180Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs; 9190Sstevel@tonic-gate if (mdsp->mds_base >= base && 9200Sstevel@tonic-gate p_end <= (base + npgs)) { 9210Sstevel@tonic-gate *prv = mdsp->mds_next; 9220Sstevel@tonic-gate mdsp->mds_next = NULL; 9230Sstevel@tonic-gate free_delspans(mdsp); 9240Sstevel@tonic-gate } else { 9250Sstevel@tonic-gate prv = &mdsp->mds_next; 9260Sstevel@tonic-gate } 9270Sstevel@tonic-gate mdsp = *prv; 9280Sstevel@tonic-gate } 9290Sstevel@tonic-gate if (my_tlp->trl_spans == NULL) 9300Sstevel@tonic-gate transit_list_remove(my_tlp); 9310Sstevel@tonic-gate } 9320Sstevel@tonic-gate } 9330Sstevel@tonic-gate mutex_exit(&trh->trh_lock); 9340Sstevel@tonic-gate } 9350Sstevel@tonic-gate 9360Sstevel@tonic-gate /* 9370Sstevel@tonic-gate * Reserve interface for add to stop delete before add finished. 9380Sstevel@tonic-gate * This list is only accessed through the delspan_insert/remove 9390Sstevel@tonic-gate * functions and so is fully protected by the mutex in struct transit_list. 9400Sstevel@tonic-gate */ 9410Sstevel@tonic-gate 9420Sstevel@tonic-gate static struct transit_list reserve_transit; 9430Sstevel@tonic-gate 9440Sstevel@tonic-gate static int 9450Sstevel@tonic-gate delspan_reserve(pfn_t base, pgcnt_t npgs) 9460Sstevel@tonic-gate { 9470Sstevel@tonic-gate struct memdelspan *mdsp; 9480Sstevel@tonic-gate int ret; 9490Sstevel@tonic-gate 9500Sstevel@tonic-gate mdsp = kmem_zalloc(sizeof (struct memdelspan), KM_SLEEP); 9510Sstevel@tonic-gate mdsp->mds_base = base; 9520Sstevel@tonic-gate mdsp->mds_npgs = npgs; 9530Sstevel@tonic-gate if ((ret = delspan_insert(&reserve_transit, mdsp)) == 0) { 9540Sstevel@tonic-gate free_delspans(mdsp); 9550Sstevel@tonic-gate } 9560Sstevel@tonic-gate return (ret); 9570Sstevel@tonic-gate } 9580Sstevel@tonic-gate 9590Sstevel@tonic-gate static void 9600Sstevel@tonic-gate delspan_unreserve(pfn_t base, pgcnt_t npgs) 9610Sstevel@tonic-gate { 9620Sstevel@tonic-gate delspan_remove(&reserve_transit, base, npgs); 9630Sstevel@tonic-gate } 9640Sstevel@tonic-gate 9650Sstevel@tonic-gate /* 9660Sstevel@tonic-gate * Return whether memseg was created by kphysm_add_memory_dynamic(). 9670Sstevel@tonic-gate * If this is the case and startp non zero, return also the start pfn 9680Sstevel@tonic-gate * of the meta data via startp. 9690Sstevel@tonic-gate */ 9700Sstevel@tonic-gate static int 9710Sstevel@tonic-gate memseg_is_dynamic(struct memseg *seg, pfn_t *startp) 9720Sstevel@tonic-gate { 9730Sstevel@tonic-gate pfn_t pt_start; 9740Sstevel@tonic-gate 9750Sstevel@tonic-gate if ((seg->msegflags & MEMSEG_DYNAMIC) == 0) 9760Sstevel@tonic-gate return (0); 9770Sstevel@tonic-gate 9780Sstevel@tonic-gate /* Meta data is required to be at the beginning */ 9790Sstevel@tonic-gate ASSERT(hat_getpfnum(kas.a_hat, (caddr_t)seg->epages) < seg->pages_base); 9800Sstevel@tonic-gate 9810Sstevel@tonic-gate pt_start = hat_getpfnum(kas.a_hat, (caddr_t)seg->pages); 9820Sstevel@tonic-gate if (startp != NULL) 9830Sstevel@tonic-gate *startp = pt_start; 9840Sstevel@tonic-gate 9850Sstevel@tonic-gate return (1); 9860Sstevel@tonic-gate } 9870Sstevel@tonic-gate 9880Sstevel@tonic-gate int 9890Sstevel@tonic-gate kphysm_del_span( 9900Sstevel@tonic-gate memhandle_t handle, 9910Sstevel@tonic-gate pfn_t base, 9920Sstevel@tonic-gate pgcnt_t npgs) 9930Sstevel@tonic-gate { 9940Sstevel@tonic-gate struct mem_handle *mhp; 9950Sstevel@tonic-gate struct memseg *seg; 9960Sstevel@tonic-gate struct memdelspan *mdsp; 9970Sstevel@tonic-gate struct memdelspan *mdsp_new; 9980Sstevel@tonic-gate pgcnt_t phys_pages, vm_pages; 9990Sstevel@tonic-gate pfn_t p_end; 10000Sstevel@tonic-gate page_t *pp; 10010Sstevel@tonic-gate int ret; 10020Sstevel@tonic-gate 10030Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle); 10040Sstevel@tonic-gate if (mhp == NULL) { 10050Sstevel@tonic-gate return (KPHYSM_EHANDLE); 10060Sstevel@tonic-gate } 10070Sstevel@tonic-gate if (mhp->mh_state != MHND_INIT) { 10080Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 10090Sstevel@tonic-gate return (KPHYSM_ESEQUENCE); 10100Sstevel@tonic-gate } 10110Sstevel@tonic-gate 10120Sstevel@tonic-gate /* 10130Sstevel@tonic-gate * Intersect the span with the installed memory list (phys_install). 10140Sstevel@tonic-gate */ 10150Sstevel@tonic-gate mdsp_new = span_to_install(base, npgs); 10160Sstevel@tonic-gate if (mdsp_new == NULL) { 10170Sstevel@tonic-gate /* 10180Sstevel@tonic-gate * No physical memory in this range. Is this an 10190Sstevel@tonic-gate * error? If an attempt to start the delete is made 10200Sstevel@tonic-gate * for OK returns from del_span such as this, start will 10210Sstevel@tonic-gate * return an error. 10220Sstevel@tonic-gate * Could return KPHYSM_ENOWORK. 10230Sstevel@tonic-gate */ 10240Sstevel@tonic-gate /* 10250Sstevel@tonic-gate * It is assumed that there are no error returns 10260Sstevel@tonic-gate * from span_to_install() due to kmem_alloc failure. 10270Sstevel@tonic-gate */ 10280Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 10290Sstevel@tonic-gate return (KPHYSM_OK); 10300Sstevel@tonic-gate } 10310Sstevel@tonic-gate /* 10320Sstevel@tonic-gate * Does this span overlap an existing span? 10330Sstevel@tonic-gate */ 10340Sstevel@tonic-gate if (delspan_insert(&mhp->mh_transit, mdsp_new) == 0) { 10350Sstevel@tonic-gate /* 10360Sstevel@tonic-gate * Differentiate between already on list for this handle 10370Sstevel@tonic-gate * (KPHYSM_EDUP) and busy elsewhere (KPHYSM_EBUSY). 10380Sstevel@tonic-gate */ 10390Sstevel@tonic-gate ret = KPHYSM_EBUSY; 10400Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 10410Sstevel@tonic-gate mdsp = mdsp->mds_next) { 10420Sstevel@tonic-gate if (overlapping(mdsp->mds_base, mdsp->mds_npgs, 10430Sstevel@tonic-gate base, npgs)) { 10440Sstevel@tonic-gate ret = KPHYSM_EDUP; 10450Sstevel@tonic-gate break; 10460Sstevel@tonic-gate } 10470Sstevel@tonic-gate } 10480Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 10490Sstevel@tonic-gate free_delspans(mdsp_new); 10500Sstevel@tonic-gate return (ret); 10510Sstevel@tonic-gate } 10520Sstevel@tonic-gate /* 10530Sstevel@tonic-gate * At this point the spans in mdsp_new have been inserted into the 10540Sstevel@tonic-gate * list of spans for this handle and thereby to the global list of 10550Sstevel@tonic-gate * spans being processed. Each of these spans must now be checked 10560Sstevel@tonic-gate * for relocatability. As a side-effect segments in the memseg list 10570Sstevel@tonic-gate * may be split. 10580Sstevel@tonic-gate * 10590Sstevel@tonic-gate * Note that mdsp_new can no longer be used as it is now part of 10600Sstevel@tonic-gate * a larger list. Select elements of this larger list based 10610Sstevel@tonic-gate * on base and npgs. 10620Sstevel@tonic-gate */ 10630Sstevel@tonic-gate restart: 10640Sstevel@tonic-gate phys_pages = 0; 10650Sstevel@tonic-gate vm_pages = 0; 10660Sstevel@tonic-gate ret = KPHYSM_OK; 10670Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 10680Sstevel@tonic-gate mdsp = mdsp->mds_next) { 10690Sstevel@tonic-gate pgcnt_t pages_checked; 10700Sstevel@tonic-gate 10710Sstevel@tonic-gate if (!overlapping(mdsp->mds_base, mdsp->mds_npgs, base, npgs)) { 10720Sstevel@tonic-gate continue; 10730Sstevel@tonic-gate } 10740Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs; 10750Sstevel@tonic-gate /* 10760Sstevel@tonic-gate * The pages_checked count is a hack. All pages should be 10770Sstevel@tonic-gate * checked for relocatability. Those not covered by memsegs 10780Sstevel@tonic-gate * should be tested with arch_kphysm_del_span_ok(). 10790Sstevel@tonic-gate */ 10800Sstevel@tonic-gate pages_checked = 0; 10810Sstevel@tonic-gate for (seg = memsegs; seg; seg = seg->next) { 10820Sstevel@tonic-gate pfn_t mseg_start; 10830Sstevel@tonic-gate 10840Sstevel@tonic-gate if (seg->pages_base >= p_end || 10850Sstevel@tonic-gate seg->pages_end <= mdsp->mds_base) { 10860Sstevel@tonic-gate /* Span and memseg don't overlap. */ 10870Sstevel@tonic-gate continue; 10880Sstevel@tonic-gate } 10890Sstevel@tonic-gate /* Check that segment is suitable for delete. */ 10900Sstevel@tonic-gate if (memseg_is_dynamic(seg, &mseg_start)) { 10910Sstevel@tonic-gate /* 10920Sstevel@tonic-gate * Can only delete whole added segments 10930Sstevel@tonic-gate * for the moment. 10940Sstevel@tonic-gate * Check that this is completely within the 10950Sstevel@tonic-gate * span. 10960Sstevel@tonic-gate */ 10970Sstevel@tonic-gate if (mseg_start < mdsp->mds_base || 10980Sstevel@tonic-gate seg->pages_end > p_end) { 10990Sstevel@tonic-gate ret = KPHYSM_EBUSY; 11000Sstevel@tonic-gate break; 11010Sstevel@tonic-gate } 11020Sstevel@tonic-gate pages_checked += seg->pages_end - mseg_start; 11030Sstevel@tonic-gate } else { 11040Sstevel@tonic-gate /* 11050Sstevel@tonic-gate * Set mseg_start for accounting below. 11060Sstevel@tonic-gate */ 11070Sstevel@tonic-gate mseg_start = seg->pages_base; 11080Sstevel@tonic-gate /* 11090Sstevel@tonic-gate * If this segment is larger than the span, 11100Sstevel@tonic-gate * try to split it. After the split, it 11110Sstevel@tonic-gate * is necessary to restart. 11120Sstevel@tonic-gate */ 11130Sstevel@tonic-gate if (seg->pages_base < mdsp->mds_base || 11140Sstevel@tonic-gate seg->pages_end > p_end) { 11150Sstevel@tonic-gate pfn_t abase; 11160Sstevel@tonic-gate pgcnt_t anpgs; 11170Sstevel@tonic-gate int s_ret; 11180Sstevel@tonic-gate 11190Sstevel@tonic-gate /* Split required. */ 11200Sstevel@tonic-gate if (mdsp->mds_base < seg->pages_base) 11210Sstevel@tonic-gate abase = seg->pages_base; 11220Sstevel@tonic-gate else 11230Sstevel@tonic-gate abase = mdsp->mds_base; 11240Sstevel@tonic-gate if (p_end > seg->pages_end) 11250Sstevel@tonic-gate anpgs = seg->pages_end - abase; 11260Sstevel@tonic-gate else 11270Sstevel@tonic-gate anpgs = p_end - abase; 11280Sstevel@tonic-gate s_ret = kphysm_split_memseg(abase, 11290Sstevel@tonic-gate anpgs); 11300Sstevel@tonic-gate if (s_ret == 0) { 11310Sstevel@tonic-gate /* Split failed. */ 11320Sstevel@tonic-gate ret = KPHYSM_ERESOURCE; 11330Sstevel@tonic-gate break; 11340Sstevel@tonic-gate } 11350Sstevel@tonic-gate goto restart; 11360Sstevel@tonic-gate } 11370Sstevel@tonic-gate pages_checked += 11380Sstevel@tonic-gate seg->pages_end - seg->pages_base; 11390Sstevel@tonic-gate } 11400Sstevel@tonic-gate /* 11410Sstevel@tonic-gate * The memseg is wholly within the delete span. 11420Sstevel@tonic-gate * The individual pages can now be checked. 11430Sstevel@tonic-gate */ 11440Sstevel@tonic-gate /* Cage test. */ 11450Sstevel@tonic-gate for (pp = seg->pages; pp < seg->epages; pp++) { 11460Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 11470Sstevel@tonic-gate ret = KPHYSM_ENONRELOC; 11480Sstevel@tonic-gate break; 11490Sstevel@tonic-gate } 11500Sstevel@tonic-gate } 11510Sstevel@tonic-gate if (ret != KPHYSM_OK) { 11520Sstevel@tonic-gate break; 11530Sstevel@tonic-gate } 11540Sstevel@tonic-gate phys_pages += (seg->pages_end - mseg_start); 11550Sstevel@tonic-gate vm_pages += MSEG_NPAGES(seg); 11560Sstevel@tonic-gate } 11570Sstevel@tonic-gate if (ret != KPHYSM_OK) 11580Sstevel@tonic-gate break; 11590Sstevel@tonic-gate if (pages_checked != mdsp->mds_npgs) { 11600Sstevel@tonic-gate ret = KPHYSM_ENONRELOC; 11610Sstevel@tonic-gate break; 11620Sstevel@tonic-gate } 11630Sstevel@tonic-gate } 11640Sstevel@tonic-gate 11650Sstevel@tonic-gate if (ret == KPHYSM_OK) { 11660Sstevel@tonic-gate mhp->mh_phys_pages += phys_pages; 11670Sstevel@tonic-gate mhp->mh_vm_pages += vm_pages; 11680Sstevel@tonic-gate } else { 11690Sstevel@tonic-gate /* 11700Sstevel@tonic-gate * Keep holding the mh_mutex to prevent it going away. 11710Sstevel@tonic-gate */ 11720Sstevel@tonic-gate delspan_remove(&mhp->mh_transit, base, npgs); 11730Sstevel@tonic-gate } 11740Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 11750Sstevel@tonic-gate return (ret); 11760Sstevel@tonic-gate } 11770Sstevel@tonic-gate 11780Sstevel@tonic-gate int 11790Sstevel@tonic-gate kphysm_del_span_query( 11800Sstevel@tonic-gate pfn_t base, 11810Sstevel@tonic-gate pgcnt_t npgs, 11820Sstevel@tonic-gate memquery_t *mqp) 11830Sstevel@tonic-gate { 11840Sstevel@tonic-gate struct memdelspan *mdsp; 11850Sstevel@tonic-gate struct memdelspan *mdsp_new; 11860Sstevel@tonic-gate int done_first_nonreloc; 11870Sstevel@tonic-gate 11880Sstevel@tonic-gate mqp->phys_pages = 0; 11890Sstevel@tonic-gate mqp->managed = 0; 11900Sstevel@tonic-gate mqp->nonrelocatable = 0; 11910Sstevel@tonic-gate mqp->first_nonrelocatable = 0; 11920Sstevel@tonic-gate mqp->last_nonrelocatable = 0; 11930Sstevel@tonic-gate 11940Sstevel@tonic-gate mdsp_new = span_to_install(base, npgs); 11950Sstevel@tonic-gate /* 11960Sstevel@tonic-gate * It is OK to proceed here if mdsp_new == NULL. 11970Sstevel@tonic-gate */ 11980Sstevel@tonic-gate done_first_nonreloc = 0; 11990Sstevel@tonic-gate for (mdsp = mdsp_new; mdsp != NULL; mdsp = mdsp->mds_next) { 12000Sstevel@tonic-gate pfn_t sbase; 12010Sstevel@tonic-gate pgcnt_t snpgs; 12020Sstevel@tonic-gate 12030Sstevel@tonic-gate mqp->phys_pages += mdsp->mds_npgs; 12040Sstevel@tonic-gate sbase = mdsp->mds_base; 12050Sstevel@tonic-gate snpgs = mdsp->mds_npgs; 12060Sstevel@tonic-gate while (snpgs != 0) { 12070Sstevel@tonic-gate struct memseg *lseg, *seg; 12080Sstevel@tonic-gate pfn_t p_end; 12090Sstevel@tonic-gate page_t *pp; 12100Sstevel@tonic-gate pfn_t mseg_start; 12110Sstevel@tonic-gate 12120Sstevel@tonic-gate p_end = sbase + snpgs; 12130Sstevel@tonic-gate /* 12140Sstevel@tonic-gate * Find the lowest addressed memseg that starts 12150Sstevel@tonic-gate * after sbase and account for it. 12160Sstevel@tonic-gate * This is to catch dynamic memsegs whose start 12170Sstevel@tonic-gate * is hidden. 12180Sstevel@tonic-gate */ 12190Sstevel@tonic-gate seg = NULL; 12200Sstevel@tonic-gate for (lseg = memsegs; lseg != NULL; lseg = lseg->next) { 12210Sstevel@tonic-gate if ((lseg->pages_base >= sbase) || 12220Sstevel@tonic-gate (lseg->pages_base < p_end && 12230Sstevel@tonic-gate lseg->pages_end > sbase)) { 12240Sstevel@tonic-gate if (seg == NULL || 12250Sstevel@tonic-gate seg->pages_base > lseg->pages_base) 12260Sstevel@tonic-gate seg = lseg; 12270Sstevel@tonic-gate } 12280Sstevel@tonic-gate } 12290Sstevel@tonic-gate if (seg != NULL) { 12300Sstevel@tonic-gate if (!memseg_is_dynamic(seg, &mseg_start)) { 12310Sstevel@tonic-gate mseg_start = seg->pages_base; 12320Sstevel@tonic-gate } 12330Sstevel@tonic-gate /* 12340Sstevel@tonic-gate * Now have the full extent of the memseg so 12350Sstevel@tonic-gate * do the range check. 12360Sstevel@tonic-gate */ 12370Sstevel@tonic-gate if (mseg_start >= p_end || 12380Sstevel@tonic-gate seg->pages_end <= sbase) { 12390Sstevel@tonic-gate /* Span does not overlap memseg. */ 12400Sstevel@tonic-gate seg = NULL; 12410Sstevel@tonic-gate } 12420Sstevel@tonic-gate } 12430Sstevel@tonic-gate /* 12440Sstevel@tonic-gate * Account for gap either before the segment if 12450Sstevel@tonic-gate * there is one or to the end of the span. 12460Sstevel@tonic-gate */ 12470Sstevel@tonic-gate if (seg == NULL || mseg_start > sbase) { 12480Sstevel@tonic-gate pfn_t a_end; 12490Sstevel@tonic-gate 12500Sstevel@tonic-gate a_end = (seg == NULL) ? p_end : mseg_start; 12510Sstevel@tonic-gate /* 12520Sstevel@tonic-gate * Check with arch layer for relocatability. 12530Sstevel@tonic-gate */ 12540Sstevel@tonic-gate if (arch_kphysm_del_span_ok(sbase, 12550Sstevel@tonic-gate (a_end - sbase))) { 12560Sstevel@tonic-gate /* 12570Sstevel@tonic-gate * No non-relocatble pages in this 12580Sstevel@tonic-gate * area, avoid the fine-grained 12590Sstevel@tonic-gate * test. 12600Sstevel@tonic-gate */ 12610Sstevel@tonic-gate snpgs -= (a_end - sbase); 12620Sstevel@tonic-gate sbase = a_end; 12630Sstevel@tonic-gate } 12640Sstevel@tonic-gate while (sbase < a_end) { 12650Sstevel@tonic-gate if (!arch_kphysm_del_span_ok(sbase, 12660Sstevel@tonic-gate 1)) { 12670Sstevel@tonic-gate mqp->nonrelocatable++; 12680Sstevel@tonic-gate if (!done_first_nonreloc) { 12690Sstevel@tonic-gate mqp-> 12700Sstevel@tonic-gate first_nonrelocatable 12710Sstevel@tonic-gate = sbase; 12720Sstevel@tonic-gate done_first_nonreloc = 1; 12730Sstevel@tonic-gate } 12740Sstevel@tonic-gate mqp->last_nonrelocatable = 12750Sstevel@tonic-gate sbase; 12760Sstevel@tonic-gate } 12770Sstevel@tonic-gate sbase++; 12780Sstevel@tonic-gate snpgs--; 12790Sstevel@tonic-gate } 12800Sstevel@tonic-gate } 12810Sstevel@tonic-gate if (seg != NULL) { 12820Sstevel@tonic-gate ASSERT(mseg_start <= sbase); 12830Sstevel@tonic-gate if (seg->pages_base != mseg_start && 12840Sstevel@tonic-gate seg->pages_base > sbase) { 12850Sstevel@tonic-gate pgcnt_t skip_pgs; 12860Sstevel@tonic-gate 12870Sstevel@tonic-gate /* 12880Sstevel@tonic-gate * Skip the page_t area of a 12890Sstevel@tonic-gate * dynamic memseg. 12900Sstevel@tonic-gate */ 12910Sstevel@tonic-gate skip_pgs = seg->pages_base - sbase; 12920Sstevel@tonic-gate if (snpgs <= skip_pgs) { 12930Sstevel@tonic-gate sbase += snpgs; 12940Sstevel@tonic-gate snpgs = 0; 12950Sstevel@tonic-gate continue; 12960Sstevel@tonic-gate } 12970Sstevel@tonic-gate snpgs -= skip_pgs; 12980Sstevel@tonic-gate sbase += skip_pgs; 12990Sstevel@tonic-gate } 13000Sstevel@tonic-gate ASSERT(snpgs != 0); 13010Sstevel@tonic-gate ASSERT(seg->pages_base <= sbase); 13020Sstevel@tonic-gate /* 13030Sstevel@tonic-gate * The individual pages can now be checked. 13040Sstevel@tonic-gate */ 13050Sstevel@tonic-gate for (pp = seg->pages + 13060Sstevel@tonic-gate (sbase - seg->pages_base); 13070Sstevel@tonic-gate snpgs != 0 && pp < seg->epages; pp++) { 13080Sstevel@tonic-gate mqp->managed++; 13090Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 13100Sstevel@tonic-gate mqp->nonrelocatable++; 13110Sstevel@tonic-gate if (!done_first_nonreloc) { 13120Sstevel@tonic-gate mqp-> 13130Sstevel@tonic-gate first_nonrelocatable 13140Sstevel@tonic-gate = sbase; 13150Sstevel@tonic-gate done_first_nonreloc = 1; 13160Sstevel@tonic-gate } 13170Sstevel@tonic-gate mqp->last_nonrelocatable = 13180Sstevel@tonic-gate sbase; 13190Sstevel@tonic-gate } 13200Sstevel@tonic-gate sbase++; 13210Sstevel@tonic-gate snpgs--; 13220Sstevel@tonic-gate } 13230Sstevel@tonic-gate } 13240Sstevel@tonic-gate } 13250Sstevel@tonic-gate } 13260Sstevel@tonic-gate 13270Sstevel@tonic-gate free_delspans(mdsp_new); 13280Sstevel@tonic-gate 13290Sstevel@tonic-gate return (KPHYSM_OK); 13300Sstevel@tonic-gate } 13310Sstevel@tonic-gate 13320Sstevel@tonic-gate /* 13330Sstevel@tonic-gate * This release function can be called at any stage as follows: 13340Sstevel@tonic-gate * _gethandle only called 13350Sstevel@tonic-gate * _span(s) only called 13360Sstevel@tonic-gate * _start called but failed 13370Sstevel@tonic-gate * delete thread exited 13380Sstevel@tonic-gate */ 13390Sstevel@tonic-gate int 13400Sstevel@tonic-gate kphysm_del_release(memhandle_t handle) 13410Sstevel@tonic-gate { 13420Sstevel@tonic-gate struct mem_handle *mhp; 13430Sstevel@tonic-gate 13440Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle); 13450Sstevel@tonic-gate if (mhp == NULL) { 13460Sstevel@tonic-gate return (KPHYSM_EHANDLE); 13470Sstevel@tonic-gate } 13480Sstevel@tonic-gate switch (mhp->mh_state) { 13490Sstevel@tonic-gate case MHND_STARTING: 13500Sstevel@tonic-gate case MHND_RUNNING: 13510Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 13520Sstevel@tonic-gate return (KPHYSM_ENOTFINISHED); 13530Sstevel@tonic-gate case MHND_FREE: 13540Sstevel@tonic-gate ASSERT(mhp->mh_state != MHND_FREE); 13550Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 13560Sstevel@tonic-gate return (KPHYSM_EHANDLE); 13570Sstevel@tonic-gate case MHND_INIT: 13580Sstevel@tonic-gate break; 13590Sstevel@tonic-gate case MHND_DONE: 13600Sstevel@tonic-gate break; 13610Sstevel@tonic-gate case MHND_RELEASE: 13620Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 13630Sstevel@tonic-gate return (KPHYSM_ESEQUENCE); 13640Sstevel@tonic-gate default: 13650Sstevel@tonic-gate #ifdef DEBUG 13660Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_del_release(0x%p) state corrupt %d", 13670Sstevel@tonic-gate (void *)mhp, mhp->mh_state); 13680Sstevel@tonic-gate #endif /* DEBUG */ 13690Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 13700Sstevel@tonic-gate return (KPHYSM_EHANDLE); 13710Sstevel@tonic-gate } 13720Sstevel@tonic-gate /* 13730Sstevel@tonic-gate * Set state so that we can wait if necessary. 13740Sstevel@tonic-gate * Also this means that we have read/write access to all 13750Sstevel@tonic-gate * fields except mh_exthandle and mh_state. 13760Sstevel@tonic-gate */ 13770Sstevel@tonic-gate mhp->mh_state = MHND_RELEASE; 13780Sstevel@tonic-gate /* 13790Sstevel@tonic-gate * The mem_handle cannot be de-allocated by any other operation 13800Sstevel@tonic-gate * now, so no need to hold mh_mutex. 13810Sstevel@tonic-gate */ 13820Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 13830Sstevel@tonic-gate 13840Sstevel@tonic-gate delspan_remove(&mhp->mh_transit, 0, 0); 13850Sstevel@tonic-gate mhp->mh_phys_pages = 0; 13860Sstevel@tonic-gate mhp->mh_vm_pages = 0; 13870Sstevel@tonic-gate mhp->mh_hold_todo = 0; 13880Sstevel@tonic-gate mhp->mh_delete_complete = NULL; 13890Sstevel@tonic-gate mhp->mh_delete_complete_arg = NULL; 13900Sstevel@tonic-gate mhp->mh_cancel = 0; 13910Sstevel@tonic-gate 13920Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 13930Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_RELEASE); 13940Sstevel@tonic-gate mhp->mh_state = MHND_FREE; 13950Sstevel@tonic-gate 13960Sstevel@tonic-gate kphysm_free_mem_handle(mhp); 13970Sstevel@tonic-gate 13980Sstevel@tonic-gate return (KPHYSM_OK); 13990Sstevel@tonic-gate } 14000Sstevel@tonic-gate 14010Sstevel@tonic-gate /* 14020Sstevel@tonic-gate * This cancel function can only be called with the thread running. 14030Sstevel@tonic-gate */ 14040Sstevel@tonic-gate int 14050Sstevel@tonic-gate kphysm_del_cancel(memhandle_t handle) 14060Sstevel@tonic-gate { 14070Sstevel@tonic-gate struct mem_handle *mhp; 14080Sstevel@tonic-gate 14090Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle); 14100Sstevel@tonic-gate if (mhp == NULL) { 14110Sstevel@tonic-gate return (KPHYSM_EHANDLE); 14120Sstevel@tonic-gate } 14130Sstevel@tonic-gate if (mhp->mh_state != MHND_STARTING && mhp->mh_state != MHND_RUNNING) { 14140Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 14150Sstevel@tonic-gate return (KPHYSM_ENOTRUNNING); 14160Sstevel@tonic-gate } 14170Sstevel@tonic-gate /* 14180Sstevel@tonic-gate * Set the cancel flag and wake the delete thread up. 14190Sstevel@tonic-gate * The thread may be waiting on I/O, so the effect of the cancel 14200Sstevel@tonic-gate * may be delayed. 14210Sstevel@tonic-gate */ 14220Sstevel@tonic-gate if (mhp->mh_cancel == 0) { 14230Sstevel@tonic-gate mhp->mh_cancel = KPHYSM_ECANCELLED; 14240Sstevel@tonic-gate cv_signal(&mhp->mh_cv); 14250Sstevel@tonic-gate } 14260Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 14270Sstevel@tonic-gate return (KPHYSM_OK); 14280Sstevel@tonic-gate } 14290Sstevel@tonic-gate 14300Sstevel@tonic-gate int 14310Sstevel@tonic-gate kphysm_del_status( 14320Sstevel@tonic-gate memhandle_t handle, 14330Sstevel@tonic-gate memdelstat_t *mdstp) 14340Sstevel@tonic-gate { 14350Sstevel@tonic-gate struct mem_handle *mhp; 14360Sstevel@tonic-gate 14370Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle); 14380Sstevel@tonic-gate if (mhp == NULL) { 14390Sstevel@tonic-gate return (KPHYSM_EHANDLE); 14400Sstevel@tonic-gate } 14410Sstevel@tonic-gate /* 14420Sstevel@tonic-gate * Calling kphysm_del_status() is allowed before the delete 14430Sstevel@tonic-gate * is started to allow for status display. 14440Sstevel@tonic-gate */ 14450Sstevel@tonic-gate if (mhp->mh_state != MHND_INIT && mhp->mh_state != MHND_STARTING && 14460Sstevel@tonic-gate mhp->mh_state != MHND_RUNNING) { 14470Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 14480Sstevel@tonic-gate return (KPHYSM_ENOTRUNNING); 14490Sstevel@tonic-gate } 14500Sstevel@tonic-gate mdstp->phys_pages = mhp->mh_phys_pages; 14510Sstevel@tonic-gate mdstp->managed = mhp->mh_vm_pages; 14520Sstevel@tonic-gate mdstp->collected = mhp->mh_vm_pages - mhp->mh_hold_todo; 14530Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 14540Sstevel@tonic-gate return (KPHYSM_OK); 14550Sstevel@tonic-gate } 14560Sstevel@tonic-gate 14570Sstevel@tonic-gate static int mem_delete_additional_pages = 100; 14580Sstevel@tonic-gate 14590Sstevel@tonic-gate static int 14600Sstevel@tonic-gate can_remove_pgs(pgcnt_t npgs) 14610Sstevel@tonic-gate { 14620Sstevel@tonic-gate /* 14630Sstevel@tonic-gate * If all pageable pages were paged out, freemem would 14640Sstevel@tonic-gate * equal availrmem. There is a minimum requirement for 14650Sstevel@tonic-gate * availrmem. 14660Sstevel@tonic-gate */ 14670Sstevel@tonic-gate if ((availrmem - (tune.t_minarmem + mem_delete_additional_pages)) 14680Sstevel@tonic-gate < npgs) 14690Sstevel@tonic-gate return (0); 14700Sstevel@tonic-gate /* TODO: check swap space, etc. */ 14710Sstevel@tonic-gate return (1); 14720Sstevel@tonic-gate } 14730Sstevel@tonic-gate 14740Sstevel@tonic-gate static int 14750Sstevel@tonic-gate get_availrmem(pgcnt_t npgs) 14760Sstevel@tonic-gate { 14770Sstevel@tonic-gate int ret; 14780Sstevel@tonic-gate 14790Sstevel@tonic-gate mutex_enter(&freemem_lock); 14800Sstevel@tonic-gate ret = can_remove_pgs(npgs); 14810Sstevel@tonic-gate if (ret != 0) 14820Sstevel@tonic-gate availrmem -= npgs; 14830Sstevel@tonic-gate mutex_exit(&freemem_lock); 14840Sstevel@tonic-gate return (ret); 14850Sstevel@tonic-gate } 14860Sstevel@tonic-gate 14870Sstevel@tonic-gate static void 14880Sstevel@tonic-gate put_availrmem(pgcnt_t npgs) 14890Sstevel@tonic-gate { 14900Sstevel@tonic-gate mutex_enter(&freemem_lock); 14910Sstevel@tonic-gate availrmem += npgs; 14920Sstevel@tonic-gate mutex_exit(&freemem_lock); 14930Sstevel@tonic-gate } 14940Sstevel@tonic-gate 14950Sstevel@tonic-gate #define FREEMEM_INCR 100 14960Sstevel@tonic-gate static pgcnt_t freemem_incr = FREEMEM_INCR; 14970Sstevel@tonic-gate #define DEL_FREE_WAIT_FRAC 4 14980Sstevel@tonic-gate #define DEL_FREE_WAIT_TICKS ((hz+DEL_FREE_WAIT_FRAC-1)/DEL_FREE_WAIT_FRAC) 14990Sstevel@tonic-gate 15000Sstevel@tonic-gate #define DEL_BUSY_WAIT_FRAC 20 15010Sstevel@tonic-gate #define DEL_BUSY_WAIT_TICKS ((hz+DEL_BUSY_WAIT_FRAC-1)/DEL_BUSY_WAIT_FRAC) 15020Sstevel@tonic-gate 15030Sstevel@tonic-gate static void kphysm_del_cleanup(struct mem_handle *); 15040Sstevel@tonic-gate 15050Sstevel@tonic-gate static void page_delete_collect(page_t *, struct mem_handle *); 15060Sstevel@tonic-gate 15070Sstevel@tonic-gate static pgcnt_t 15080Sstevel@tonic-gate delthr_get_freemem(struct mem_handle *mhp) 15090Sstevel@tonic-gate { 15100Sstevel@tonic-gate pgcnt_t free_get; 15110Sstevel@tonic-gate int ret; 15120Sstevel@tonic-gate 15130Sstevel@tonic-gate ASSERT(MUTEX_HELD(&mhp->mh_mutex)); 15140Sstevel@tonic-gate 15150Sstevel@tonic-gate MDSTAT_INCR(mhp, need_free); 15160Sstevel@tonic-gate /* 15170Sstevel@tonic-gate * Get up to freemem_incr pages. 15180Sstevel@tonic-gate */ 15190Sstevel@tonic-gate free_get = freemem_incr; 15200Sstevel@tonic-gate if (free_get > mhp->mh_hold_todo) 15210Sstevel@tonic-gate free_get = mhp->mh_hold_todo; 15220Sstevel@tonic-gate /* 15230Sstevel@tonic-gate * Take free_get pages away from freemem, 15240Sstevel@tonic-gate * waiting if necessary. 15250Sstevel@tonic-gate */ 15260Sstevel@tonic-gate 15270Sstevel@tonic-gate while (!mhp->mh_cancel) { 15280Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 15290Sstevel@tonic-gate MDSTAT_INCR(mhp, free_loop); 15300Sstevel@tonic-gate /* 15310Sstevel@tonic-gate * Duplicate test from page_create_throttle() 15320Sstevel@tonic-gate * but don't override with !PG_WAIT. 15330Sstevel@tonic-gate */ 15340Sstevel@tonic-gate if (freemem < (free_get + throttlefree)) { 15350Sstevel@tonic-gate MDSTAT_INCR(mhp, free_low); 15360Sstevel@tonic-gate ret = 0; 15370Sstevel@tonic-gate } else { 15380Sstevel@tonic-gate ret = page_create_wait(free_get, 0); 15390Sstevel@tonic-gate if (ret == 0) { 15400Sstevel@tonic-gate /* EMPTY */ 15410Sstevel@tonic-gate MDSTAT_INCR(mhp, free_failed); 15420Sstevel@tonic-gate } 15430Sstevel@tonic-gate } 15440Sstevel@tonic-gate if (ret != 0) { 15450Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 15460Sstevel@tonic-gate return (free_get); 15470Sstevel@tonic-gate } 15480Sstevel@tonic-gate 15490Sstevel@tonic-gate /* 15500Sstevel@tonic-gate * Put pressure on pageout. 15510Sstevel@tonic-gate */ 15520Sstevel@tonic-gate page_needfree(free_get); 15530Sstevel@tonic-gate cv_signal(&proc_pageout->p_cv); 15540Sstevel@tonic-gate 15550Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 15560Sstevel@tonic-gate (void) cv_timedwait(&mhp->mh_cv, &mhp->mh_mutex, 15570Sstevel@tonic-gate (lbolt + DEL_FREE_WAIT_TICKS)); 15580Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 15590Sstevel@tonic-gate page_needfree(-(spgcnt_t)free_get); 15600Sstevel@tonic-gate 15610Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 15620Sstevel@tonic-gate } 15630Sstevel@tonic-gate return (0); 15640Sstevel@tonic-gate } 15650Sstevel@tonic-gate 15660Sstevel@tonic-gate #define DR_AIO_CLEANUP_DELAY 25000 /* 0.025secs, in usec */ 15670Sstevel@tonic-gate #define DR_AIO_CLEANUP_MAXLOOPS_NODELAY 100 15680Sstevel@tonic-gate /* 15690Sstevel@tonic-gate * This function is run as a helper thread for delete_memory_thread. 15700Sstevel@tonic-gate * It is needed in order to force kaio cleanup, so that pages used in kaio 15710Sstevel@tonic-gate * will be unlocked and subsequently relocated by delete_memory_thread. 15720Sstevel@tonic-gate * The address of the delete_memory_threads's mem_handle is passed in to 15730Sstevel@tonic-gate * this thread function, and is used to set the mh_aio_cleanup_done member 15740Sstevel@tonic-gate * prior to calling thread_exit(). 15750Sstevel@tonic-gate */ 15760Sstevel@tonic-gate static void 15770Sstevel@tonic-gate dr_aio_cleanup_thread(caddr_t amhp) 15780Sstevel@tonic-gate { 15790Sstevel@tonic-gate proc_t *procp; 15800Sstevel@tonic-gate int (*aio_cleanup_dr_delete_memory)(proc_t *); 15810Sstevel@tonic-gate int cleaned; 15820Sstevel@tonic-gate int n = 0; 15830Sstevel@tonic-gate struct mem_handle *mhp; 15840Sstevel@tonic-gate volatile uint_t *pcancel; 15850Sstevel@tonic-gate 15860Sstevel@tonic-gate mhp = (struct mem_handle *)amhp; 15870Sstevel@tonic-gate ASSERT(mhp != NULL); 15880Sstevel@tonic-gate pcancel = &mhp->mh_dr_aio_cleanup_cancel; 15890Sstevel@tonic-gate if (modload("sys", "kaio") == -1) { 15900Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 1; 15910Sstevel@tonic-gate cmn_err(CE_WARN, "dr_aio_cleanup_thread: cannot load kaio"); 15920Sstevel@tonic-gate thread_exit(); 15930Sstevel@tonic-gate } 15940Sstevel@tonic-gate aio_cleanup_dr_delete_memory = (int (*)(proc_t *)) 15950Sstevel@tonic-gate modgetsymvalue("aio_cleanup_dr_delete_memory", 0); 15960Sstevel@tonic-gate if (aio_cleanup_dr_delete_memory == NULL) { 15970Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 1; 15980Sstevel@tonic-gate cmn_err(CE_WARN, 15990Sstevel@tonic-gate "aio_cleanup_dr_delete_memory not found in kaio"); 16000Sstevel@tonic-gate thread_exit(); 16010Sstevel@tonic-gate } 16020Sstevel@tonic-gate do { 16030Sstevel@tonic-gate cleaned = 0; 16040Sstevel@tonic-gate mutex_enter(&pidlock); 16050Sstevel@tonic-gate for (procp = practive; (*pcancel == 0) && (procp != NULL); 16060Sstevel@tonic-gate procp = procp->p_next) { 16070Sstevel@tonic-gate mutex_enter(&procp->p_lock); 16080Sstevel@tonic-gate if (procp->p_aio != NULL) { 16090Sstevel@tonic-gate /* cleanup proc's outstanding kaio */ 16100Sstevel@tonic-gate cleaned += 16110Sstevel@tonic-gate (*aio_cleanup_dr_delete_memory)(procp); 16120Sstevel@tonic-gate } 16130Sstevel@tonic-gate mutex_exit(&procp->p_lock); 16140Sstevel@tonic-gate } 16150Sstevel@tonic-gate mutex_exit(&pidlock); 16160Sstevel@tonic-gate if ((*pcancel == 0) && 16170Sstevel@tonic-gate (!cleaned || (++n == DR_AIO_CLEANUP_MAXLOOPS_NODELAY))) { 16180Sstevel@tonic-gate /* delay a bit before retrying all procs again */ 16190Sstevel@tonic-gate delay(drv_usectohz(DR_AIO_CLEANUP_DELAY)); 16200Sstevel@tonic-gate n = 0; 16210Sstevel@tonic-gate } 16220Sstevel@tonic-gate } while (*pcancel == 0); 16230Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 1; 16240Sstevel@tonic-gate thread_exit(); 16250Sstevel@tonic-gate } 16260Sstevel@tonic-gate 16270Sstevel@tonic-gate static void 16280Sstevel@tonic-gate delete_memory_thread(caddr_t amhp) 16290Sstevel@tonic-gate { 16300Sstevel@tonic-gate struct mem_handle *mhp; 16310Sstevel@tonic-gate struct memdelspan *mdsp; 16320Sstevel@tonic-gate callb_cpr_t cprinfo; 16330Sstevel@tonic-gate page_t *pp_targ; 16340Sstevel@tonic-gate spgcnt_t freemem_left; 16350Sstevel@tonic-gate void (*del_complete_funcp)(void *, int error); 16360Sstevel@tonic-gate void *del_complete_arg; 16370Sstevel@tonic-gate int comp_code; 16380Sstevel@tonic-gate int ret; 16390Sstevel@tonic-gate int first_scan; 16400Sstevel@tonic-gate uint_t szc; 16410Sstevel@tonic-gate #ifdef MEM_DEL_STATS 16420Sstevel@tonic-gate uint64_t start_total, ntick_total; 16430Sstevel@tonic-gate uint64_t start_pgrp, ntick_pgrp; 16440Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 16450Sstevel@tonic-gate 16460Sstevel@tonic-gate mhp = (struct mem_handle *)amhp; 16470Sstevel@tonic-gate 16480Sstevel@tonic-gate #ifdef MEM_DEL_STATS 16490Sstevel@tonic-gate start_total = ddi_get_lbolt(); 16500Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 16510Sstevel@tonic-gate 16520Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &mhp->mh_mutex, 16530Sstevel@tonic-gate callb_generic_cpr, "memdel"); 16540Sstevel@tonic-gate 16550Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 16560Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_STARTING); 16570Sstevel@tonic-gate 16580Sstevel@tonic-gate mhp->mh_state = MHND_RUNNING; 16590Sstevel@tonic-gate mhp->mh_thread_id = curthread; 16600Sstevel@tonic-gate 16610Sstevel@tonic-gate mhp->mh_hold_todo = mhp->mh_vm_pages; 16620Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 16630Sstevel@tonic-gate 16640Sstevel@tonic-gate /* Allocate the remap pages now, if necessary. */ 16650Sstevel@tonic-gate memseg_remap_init(); 16660Sstevel@tonic-gate 16670Sstevel@tonic-gate /* 16680Sstevel@tonic-gate * Subtract from availrmem now if possible as availrmem 16690Sstevel@tonic-gate * may not be available by the end of the delete. 16700Sstevel@tonic-gate */ 16710Sstevel@tonic-gate if (!get_availrmem(mhp->mh_vm_pages)) { 16720Sstevel@tonic-gate comp_code = KPHYSM_ENOTVIABLE; 16730Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 16740Sstevel@tonic-gate goto early_exit; 16750Sstevel@tonic-gate } 16760Sstevel@tonic-gate 16770Sstevel@tonic-gate ret = kphysm_setup_pre_del(mhp->mh_vm_pages); 16780Sstevel@tonic-gate 16790Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 16800Sstevel@tonic-gate 16810Sstevel@tonic-gate if (ret != 0) { 16820Sstevel@tonic-gate mhp->mh_cancel = KPHYSM_EREFUSED; 16830Sstevel@tonic-gate goto refused; 16840Sstevel@tonic-gate } 16850Sstevel@tonic-gate 16860Sstevel@tonic-gate transit_list_collect(mhp, 1); 16870Sstevel@tonic-gate 16880Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 16890Sstevel@tonic-gate mdsp = mdsp->mds_next) { 16900Sstevel@tonic-gate ASSERT(mdsp->mds_bitmap == NULL); 16910Sstevel@tonic-gate mdsp->mds_bitmap = kmem_zalloc(MDS_BITMAPBYTES(mdsp), KM_SLEEP); 16920Sstevel@tonic-gate mdsp->mds_bitmap_retired = kmem_zalloc(MDS_BITMAPBYTES(mdsp), 16930Sstevel@tonic-gate KM_SLEEP); 16940Sstevel@tonic-gate } 16950Sstevel@tonic-gate 16960Sstevel@tonic-gate first_scan = 1; 16970Sstevel@tonic-gate freemem_left = 0; 16980Sstevel@tonic-gate /* 16990Sstevel@tonic-gate * Start dr_aio_cleanup_thread, which periodically iterates 17000Sstevel@tonic-gate * through the process list and invokes aio cleanup. This 17010Sstevel@tonic-gate * is needed in order to avoid a deadly embrace between the 17020Sstevel@tonic-gate * delete_memory_thread (waiting on writer lock for page, with the 17030Sstevel@tonic-gate * exclusive-wanted bit set), kaio read request threads (waiting for a 17040Sstevel@tonic-gate * reader lock on the same page that is wanted by the 17050Sstevel@tonic-gate * delete_memory_thread), and threads waiting for kaio completion 17060Sstevel@tonic-gate * (blocked on spt_amp->lock). 17070Sstevel@tonic-gate */ 17080Sstevel@tonic-gate mhp->mh_dr_aio_cleanup_cancel = 0; 17090Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 0; 17100Sstevel@tonic-gate (void) thread_create(NULL, 0, dr_aio_cleanup_thread, 17110Sstevel@tonic-gate (caddr_t)mhp, 0, &p0, TS_RUN, maxclsyspri - 1); 17120Sstevel@tonic-gate while ((mhp->mh_hold_todo != 0) && (mhp->mh_cancel == 0)) { 17130Sstevel@tonic-gate pgcnt_t collected; 17140Sstevel@tonic-gate 17150Sstevel@tonic-gate MDSTAT_INCR(mhp, nloop); 17160Sstevel@tonic-gate collected = 0; 17170Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; (mdsp != NULL) && 17180Sstevel@tonic-gate (mhp->mh_cancel == 0); mdsp = mdsp->mds_next) { 17190Sstevel@tonic-gate pfn_t pfn, p_end; 17200Sstevel@tonic-gate 17210Sstevel@tonic-gate if (first_scan) { 17220Sstevel@tonic-gate mem_node_pre_del_slice(mdsp->mds_base, 17230Sstevel@tonic-gate mdsp->mds_base + mdsp->mds_npgs - 1); 17240Sstevel@tonic-gate } 17250Sstevel@tonic-gate 17260Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs; 17270Sstevel@tonic-gate for (pfn = mdsp->mds_base; (pfn < p_end) && 17280Sstevel@tonic-gate (mhp->mh_cancel == 0); pfn++) { 17290Sstevel@tonic-gate page_t *pp, *tpp, *tpp_targ; 17300Sstevel@tonic-gate pgcnt_t bit; 17310Sstevel@tonic-gate struct vnode *vp; 17320Sstevel@tonic-gate u_offset_t offset; 17330Sstevel@tonic-gate int mod, result; 17340Sstevel@tonic-gate spgcnt_t pgcnt; 17350Sstevel@tonic-gate 17360Sstevel@tonic-gate bit = pfn - mdsp->mds_base; 17370Sstevel@tonic-gate if ((mdsp->mds_bitmap[bit / NBPBMW] & 17380Sstevel@tonic-gate (1 << (bit % NBPBMW))) != 0) { 17390Sstevel@tonic-gate MDSTAT_INCR(mhp, already_done); 17400Sstevel@tonic-gate continue; 17410Sstevel@tonic-gate } 17420Sstevel@tonic-gate if (freemem_left == 0) { 17430Sstevel@tonic-gate freemem_left += delthr_get_freemem(mhp); 17440Sstevel@tonic-gate if (freemem_left == 0) 17450Sstevel@tonic-gate break; 17460Sstevel@tonic-gate } 17470Sstevel@tonic-gate 17480Sstevel@tonic-gate /* 17490Sstevel@tonic-gate * Release mh_mutex - some of this 17500Sstevel@tonic-gate * stuff takes some time (eg PUTPAGE). 17510Sstevel@tonic-gate */ 17520Sstevel@tonic-gate 17530Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 17540Sstevel@tonic-gate MDSTAT_INCR(mhp, ncheck); 17550Sstevel@tonic-gate 17560Sstevel@tonic-gate pp = page_numtopp_nolock(pfn); 17570Sstevel@tonic-gate if (pp == NULL) { 17580Sstevel@tonic-gate /* 17590Sstevel@tonic-gate * Not covered by a page_t - will 17600Sstevel@tonic-gate * be dealt with elsewhere. 17610Sstevel@tonic-gate */ 17620Sstevel@tonic-gate MDSTAT_INCR(mhp, nopaget); 17630Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 17640Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |= 17650Sstevel@tonic-gate (1 << (bit % NBPBMW)); 17660Sstevel@tonic-gate continue; 17670Sstevel@tonic-gate } 17680Sstevel@tonic-gate 17690Sstevel@tonic-gate if (!page_try_reclaim_lock(pp, SE_EXCL, 1770917Selowe SE_EXCL_WANTED | SE_RETIRED)) { 1771917Selowe /* 1772917Selowe * Page in use elsewhere. Skip it. 1773917Selowe */ 1774917Selowe MDSTAT_INCR(mhp, lockfail); 1775917Selowe mutex_enter(&mhp->mh_mutex); 1776917Selowe continue; 17770Sstevel@tonic-gate } 17780Sstevel@tonic-gate /* 17790Sstevel@tonic-gate * See if the cage expanded into the delete. 17800Sstevel@tonic-gate * This can happen as we have to allow the 17810Sstevel@tonic-gate * cage to expand. 17820Sstevel@tonic-gate */ 17830Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 1784917Selowe page_unlock(pp); 17850Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 17860Sstevel@tonic-gate mhp->mh_cancel = KPHYSM_ENONRELOC; 17870Sstevel@tonic-gate break; 17880Sstevel@tonic-gate } 1789917Selowe if (PP_RETIRED(pp)) { 17900Sstevel@tonic-gate /* 17910Sstevel@tonic-gate * Page has been retired and is 17920Sstevel@tonic-gate * not part of the cage so we 17930Sstevel@tonic-gate * can now do the accounting for 17940Sstevel@tonic-gate * it. 17950Sstevel@tonic-gate */ 17960Sstevel@tonic-gate MDSTAT_INCR(mhp, retired); 17970Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 17980Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] 17990Sstevel@tonic-gate |= (1 << (bit % NBPBMW)); 18000Sstevel@tonic-gate mdsp->mds_bitmap_retired[bit / 18010Sstevel@tonic-gate NBPBMW] |= 18020Sstevel@tonic-gate (1 << (bit % NBPBMW)); 18030Sstevel@tonic-gate mhp->mh_hold_todo--; 18040Sstevel@tonic-gate continue; 18050Sstevel@tonic-gate } 18060Sstevel@tonic-gate ASSERT(freemem_left != 0); 18070Sstevel@tonic-gate if (PP_ISFREE(pp)) { 18080Sstevel@tonic-gate /* 18090Sstevel@tonic-gate * Like page_reclaim() only 'freemem' 18100Sstevel@tonic-gate * processing is already done. 18110Sstevel@tonic-gate */ 18120Sstevel@tonic-gate MDSTAT_INCR(mhp, nfree); 18130Sstevel@tonic-gate free_page_collect: 18140Sstevel@tonic-gate if (PP_ISAGED(pp)) { 18150Sstevel@tonic-gate page_list_sub(pp, 18160Sstevel@tonic-gate PG_FREE_LIST); 18170Sstevel@tonic-gate } else { 18180Sstevel@tonic-gate page_list_sub(pp, 18190Sstevel@tonic-gate PG_CACHE_LIST); 18200Sstevel@tonic-gate } 18210Sstevel@tonic-gate PP_CLRFREE(pp); 18220Sstevel@tonic-gate PP_CLRAGED(pp); 18230Sstevel@tonic-gate collected++; 18240Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 18250Sstevel@tonic-gate page_delete_collect(pp, mhp); 18260Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |= 18270Sstevel@tonic-gate (1 << (bit % NBPBMW)); 18280Sstevel@tonic-gate freemem_left--; 18290Sstevel@tonic-gate continue; 18300Sstevel@tonic-gate } 18310Sstevel@tonic-gate ASSERT(pp->p_vnode != NULL); 18320Sstevel@tonic-gate if (first_scan) { 18330Sstevel@tonic-gate MDSTAT_INCR(mhp, first_notfree); 18340Sstevel@tonic-gate page_unlock(pp); 18350Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 18360Sstevel@tonic-gate continue; 18370Sstevel@tonic-gate } 18380Sstevel@tonic-gate /* 18390Sstevel@tonic-gate * Keep stats on pages encountered that 1840917Selowe * are marked for retirement. 18410Sstevel@tonic-gate */ 1842917Selowe if (PP_TOXIC(pp)) { 18430Sstevel@tonic-gate MDSTAT_INCR(mhp, toxic); 1844917Selowe } else if (PP_PR_REQ(pp)) { 18450Sstevel@tonic-gate MDSTAT_INCR(mhp, failing); 18460Sstevel@tonic-gate } 18470Sstevel@tonic-gate /* 18480Sstevel@tonic-gate * In certain cases below, special exceptions 18490Sstevel@tonic-gate * are made for pages that are toxic. This 18500Sstevel@tonic-gate * is because the current meaning of toxic 18510Sstevel@tonic-gate * is that an uncorrectable error has been 18520Sstevel@tonic-gate * previously associated with the page. 18530Sstevel@tonic-gate */ 18540Sstevel@tonic-gate if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) { 1855917Selowe if (!PP_TOXIC(pp)) { 18560Sstevel@tonic-gate /* 18570Sstevel@tonic-gate * Must relocate locked in 18580Sstevel@tonic-gate * memory pages. 18590Sstevel@tonic-gate */ 18600Sstevel@tonic-gate #ifdef MEM_DEL_STATS 18610Sstevel@tonic-gate start_pgrp = ddi_get_lbolt(); 18620Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 18630Sstevel@tonic-gate /* 18640Sstevel@tonic-gate * Lock all constituent pages 18650Sstevel@tonic-gate * of a large page to ensure 18660Sstevel@tonic-gate * that p_szc won't change. 18670Sstevel@tonic-gate */ 18680Sstevel@tonic-gate if (!group_page_trylock(pp, 18690Sstevel@tonic-gate SE_EXCL)) { 18700Sstevel@tonic-gate MDSTAT_INCR(mhp, 18710Sstevel@tonic-gate gptllckfail); 18720Sstevel@tonic-gate page_unlock(pp); 18730Sstevel@tonic-gate mutex_enter( 18740Sstevel@tonic-gate &mhp->mh_mutex); 18750Sstevel@tonic-gate continue; 18760Sstevel@tonic-gate } 18770Sstevel@tonic-gate MDSTAT_INCR(mhp, npplocked); 18780Sstevel@tonic-gate pp_targ = 18790Sstevel@tonic-gate page_get_replacement_page( 18800Sstevel@tonic-gate pp, NULL, 0); 18810Sstevel@tonic-gate if (pp_targ != NULL) { 18820Sstevel@tonic-gate #ifdef MEM_DEL_STATS 18830Sstevel@tonic-gate ntick_pgrp = 18840Sstevel@tonic-gate (uint64_t) 18850Sstevel@tonic-gate ddi_get_lbolt() - 18860Sstevel@tonic-gate start_pgrp; 18870Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 18880Sstevel@tonic-gate MDSTAT_PGRP(mhp, 18890Sstevel@tonic-gate ntick_pgrp); 18900Sstevel@tonic-gate MDSTAT_INCR(mhp, 18910Sstevel@tonic-gate nlockreloc); 18920Sstevel@tonic-gate goto reloc; 18930Sstevel@tonic-gate } 18940Sstevel@tonic-gate group_page_unlock(pp); 18950Sstevel@tonic-gate page_unlock(pp); 18960Sstevel@tonic-gate #ifdef MEM_DEL_STATS 18970Sstevel@tonic-gate ntick_pgrp = 18980Sstevel@tonic-gate (uint64_t)ddi_get_lbolt() - 18990Sstevel@tonic-gate start_pgrp; 19000Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 19010Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 19020Sstevel@tonic-gate MDSTAT_INCR(mhp, nnorepl); 19030Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 19040Sstevel@tonic-gate continue; 19050Sstevel@tonic-gate } else { 19060Sstevel@tonic-gate /* 19070Sstevel@tonic-gate * Cannot do anything about 19080Sstevel@tonic-gate * this page because it is 19090Sstevel@tonic-gate * toxic. 19100Sstevel@tonic-gate */ 19110Sstevel@tonic-gate MDSTAT_INCR(mhp, npplkdtoxic); 19120Sstevel@tonic-gate page_unlock(pp); 19130Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 19140Sstevel@tonic-gate continue; 19150Sstevel@tonic-gate } 19160Sstevel@tonic-gate } 19170Sstevel@tonic-gate /* 19180Sstevel@tonic-gate * Unload the mappings and check if mod bit 19190Sstevel@tonic-gate * is set. 19200Sstevel@tonic-gate */ 19213290Sjohansen ASSERT(!PP_ISKAS(pp)); 19220Sstevel@tonic-gate (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD); 19230Sstevel@tonic-gate mod = hat_ismod(pp); 19240Sstevel@tonic-gate 19250Sstevel@tonic-gate #ifdef MEM_DEL_STATS 19260Sstevel@tonic-gate start_pgrp = ddi_get_lbolt(); 19270Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 1928917Selowe if (mod && !PP_TOXIC(pp)) { 19290Sstevel@tonic-gate /* 19300Sstevel@tonic-gate * Lock all constituent pages 19310Sstevel@tonic-gate * of a large page to ensure 19320Sstevel@tonic-gate * that p_szc won't change. 19330Sstevel@tonic-gate */ 19340Sstevel@tonic-gate if (!group_page_trylock(pp, SE_EXCL)) { 19350Sstevel@tonic-gate MDSTAT_INCR(mhp, gptlmodfail); 19360Sstevel@tonic-gate page_unlock(pp); 19370Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 19380Sstevel@tonic-gate continue; 19390Sstevel@tonic-gate } 19400Sstevel@tonic-gate pp_targ = page_get_replacement_page(pp, 19410Sstevel@tonic-gate NULL, 0); 19420Sstevel@tonic-gate if (pp_targ != NULL) { 19430Sstevel@tonic-gate MDSTAT_INCR(mhp, nmodreloc); 19440Sstevel@tonic-gate #ifdef MEM_DEL_STATS 19450Sstevel@tonic-gate ntick_pgrp = 19460Sstevel@tonic-gate (uint64_t)ddi_get_lbolt() - 19470Sstevel@tonic-gate start_pgrp; 19480Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 19490Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 19500Sstevel@tonic-gate goto reloc; 19510Sstevel@tonic-gate } 19520Sstevel@tonic-gate group_page_unlock(pp); 19530Sstevel@tonic-gate } 19540Sstevel@tonic-gate 19550Sstevel@tonic-gate if (!page_try_demote_pages(pp)) { 19560Sstevel@tonic-gate MDSTAT_INCR(mhp, demotefail); 19570Sstevel@tonic-gate page_unlock(pp); 19580Sstevel@tonic-gate #ifdef MEM_DEL_STATS 19590Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() - 19600Sstevel@tonic-gate start_pgrp; 19610Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 19620Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 19630Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 19640Sstevel@tonic-gate continue; 19650Sstevel@tonic-gate } 19660Sstevel@tonic-gate 19670Sstevel@tonic-gate /* 19680Sstevel@tonic-gate * Regular 'page-out'. 19690Sstevel@tonic-gate */ 19700Sstevel@tonic-gate if (!mod) { 19710Sstevel@tonic-gate MDSTAT_INCR(mhp, ndestroy); 19720Sstevel@tonic-gate page_destroy(pp, 1); 19730Sstevel@tonic-gate /* 19740Sstevel@tonic-gate * page_destroy was called with 19750Sstevel@tonic-gate * dontfree. As long as p_lckcnt 19760Sstevel@tonic-gate * and p_cowcnt are both zero, the 19770Sstevel@tonic-gate * only additional action of 19780Sstevel@tonic-gate * page_destroy with !dontfree is to 19790Sstevel@tonic-gate * call page_free, so we can collect 19800Sstevel@tonic-gate * the page here. 19810Sstevel@tonic-gate */ 19820Sstevel@tonic-gate collected++; 19830Sstevel@tonic-gate #ifdef MEM_DEL_STATS 19840Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() - 19850Sstevel@tonic-gate start_pgrp; 19860Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 19870Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 19880Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 19890Sstevel@tonic-gate page_delete_collect(pp, mhp); 19900Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |= 19910Sstevel@tonic-gate (1 << (bit % NBPBMW)); 19920Sstevel@tonic-gate continue; 19930Sstevel@tonic-gate } 19940Sstevel@tonic-gate /* 19950Sstevel@tonic-gate * The page is toxic and the mod bit is 19960Sstevel@tonic-gate * set, we cannot do anything here to deal 19970Sstevel@tonic-gate * with it. 19980Sstevel@tonic-gate */ 1999917Selowe if (PP_TOXIC(pp)) { 20000Sstevel@tonic-gate page_unlock(pp); 20010Sstevel@tonic-gate #ifdef MEM_DEL_STATS 20020Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() - 20030Sstevel@tonic-gate start_pgrp; 20040Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 20050Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 20060Sstevel@tonic-gate MDSTAT_INCR(mhp, modtoxic); 20070Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 20080Sstevel@tonic-gate continue; 20090Sstevel@tonic-gate } 20100Sstevel@tonic-gate MDSTAT_INCR(mhp, nputpage); 20110Sstevel@tonic-gate vp = pp->p_vnode; 20120Sstevel@tonic-gate offset = pp->p_offset; 20130Sstevel@tonic-gate VN_HOLD(vp); 20140Sstevel@tonic-gate page_unlock(pp); 20150Sstevel@tonic-gate (void) VOP_PUTPAGE(vp, offset, PAGESIZE, 2016*5331Samw B_INVAL|B_FORCE, kcred, NULL); 20170Sstevel@tonic-gate VN_RELE(vp); 20180Sstevel@tonic-gate #ifdef MEM_DEL_STATS 20190Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() - 20200Sstevel@tonic-gate start_pgrp; 20210Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 20220Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 20230Sstevel@tonic-gate /* 20240Sstevel@tonic-gate * Try to get the page back immediately 20250Sstevel@tonic-gate * so that it can be collected. 20260Sstevel@tonic-gate */ 20270Sstevel@tonic-gate pp = page_numtopp_nolock(pfn); 20280Sstevel@tonic-gate if (pp == NULL) { 20290Sstevel@tonic-gate MDSTAT_INCR(mhp, nnoreclaim); 20300Sstevel@tonic-gate /* 20310Sstevel@tonic-gate * This should not happen as this 20320Sstevel@tonic-gate * thread is deleting the page. 20330Sstevel@tonic-gate * If this code is generalized, this 20340Sstevel@tonic-gate * becomes a reality. 20350Sstevel@tonic-gate */ 20360Sstevel@tonic-gate #ifdef DEBUG 20370Sstevel@tonic-gate cmn_err(CE_WARN, 20380Sstevel@tonic-gate "delete_memory_thread(0x%p) " 20390Sstevel@tonic-gate "pfn 0x%lx has no page_t", 20400Sstevel@tonic-gate (void *)mhp, pfn); 20410Sstevel@tonic-gate #endif /* DEBUG */ 20420Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 20430Sstevel@tonic-gate continue; 20440Sstevel@tonic-gate } 20450Sstevel@tonic-gate if (page_try_reclaim_lock(pp, SE_EXCL, 2046917Selowe SE_EXCL_WANTED | SE_RETIRED)) { 20470Sstevel@tonic-gate if (PP_ISFREE(pp)) { 20480Sstevel@tonic-gate goto free_page_collect; 20490Sstevel@tonic-gate } 20500Sstevel@tonic-gate page_unlock(pp); 20510Sstevel@tonic-gate } 20520Sstevel@tonic-gate MDSTAT_INCR(mhp, nnoreclaim); 20530Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 20540Sstevel@tonic-gate continue; 20550Sstevel@tonic-gate 20560Sstevel@tonic-gate reloc: 20570Sstevel@tonic-gate /* 20580Sstevel@tonic-gate * Got some freemem and a target 20590Sstevel@tonic-gate * page, so move the data to avoid 20600Sstevel@tonic-gate * I/O and lock problems. 20610Sstevel@tonic-gate */ 20620Sstevel@tonic-gate ASSERT(!page_iolock_assert(pp)); 20630Sstevel@tonic-gate MDSTAT_INCR(mhp, nreloc); 20640Sstevel@tonic-gate /* 20650Sstevel@tonic-gate * page_relocate() will return pgcnt: the 20660Sstevel@tonic-gate * number of consecutive pages relocated. 20670Sstevel@tonic-gate * If it is successful, pp will be a 20680Sstevel@tonic-gate * linked list of the page structs that 20690Sstevel@tonic-gate * were relocated. If page_relocate() is 20700Sstevel@tonic-gate * unsuccessful, pp will be unmodified. 20710Sstevel@tonic-gate */ 20720Sstevel@tonic-gate #ifdef MEM_DEL_STATS 20730Sstevel@tonic-gate start_pgrp = ddi_get_lbolt(); 20740Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 20750Sstevel@tonic-gate result = page_relocate(&pp, &pp_targ, 0, 0, 20760Sstevel@tonic-gate &pgcnt, NULL); 20770Sstevel@tonic-gate #ifdef MEM_DEL_STATS 20780Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() - 20790Sstevel@tonic-gate start_pgrp; 20800Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 20810Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 20820Sstevel@tonic-gate if (result != 0) { 20830Sstevel@tonic-gate MDSTAT_INCR(mhp, nrelocfail); 20840Sstevel@tonic-gate /* 20850Sstevel@tonic-gate * We did not succeed. We need 20860Sstevel@tonic-gate * to give the pp_targ pages back. 20870Sstevel@tonic-gate * page_free(pp_targ, 1) without 20880Sstevel@tonic-gate * the freemem accounting. 20890Sstevel@tonic-gate */ 20900Sstevel@tonic-gate group_page_unlock(pp); 20910Sstevel@tonic-gate page_free_replacement_page(pp_targ); 20920Sstevel@tonic-gate page_unlock(pp); 20930Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 20940Sstevel@tonic-gate continue; 20950Sstevel@tonic-gate } 20960Sstevel@tonic-gate 20970Sstevel@tonic-gate /* 20980Sstevel@tonic-gate * We will then collect pgcnt pages. 20990Sstevel@tonic-gate */ 21000Sstevel@tonic-gate ASSERT(pgcnt > 0); 21010Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 21020Sstevel@tonic-gate /* 21030Sstevel@tonic-gate * We need to make sure freemem_left is 21040Sstevel@tonic-gate * large enough. 21050Sstevel@tonic-gate */ 21060Sstevel@tonic-gate while ((freemem_left < pgcnt) && 21070Sstevel@tonic-gate (!mhp->mh_cancel)) { 21080Sstevel@tonic-gate freemem_left += 21090Sstevel@tonic-gate delthr_get_freemem(mhp); 21100Sstevel@tonic-gate } 21110Sstevel@tonic-gate 21120Sstevel@tonic-gate /* 21130Sstevel@tonic-gate * Do not proceed if mh_cancel is set. 21140Sstevel@tonic-gate */ 21150Sstevel@tonic-gate if (mhp->mh_cancel) { 21160Sstevel@tonic-gate while (pp_targ != NULL) { 21170Sstevel@tonic-gate /* 21180Sstevel@tonic-gate * Unlink and unlock each page. 21190Sstevel@tonic-gate */ 21200Sstevel@tonic-gate tpp_targ = pp_targ; 21210Sstevel@tonic-gate page_sub(&pp_targ, tpp_targ); 21220Sstevel@tonic-gate page_unlock(tpp_targ); 21230Sstevel@tonic-gate } 21240Sstevel@tonic-gate /* 21250Sstevel@tonic-gate * We need to give the pp pages back. 21260Sstevel@tonic-gate * page_free(pp, 1) without the 21270Sstevel@tonic-gate * freemem accounting. 21280Sstevel@tonic-gate */ 21290Sstevel@tonic-gate page_free_replacement_page(pp); 21300Sstevel@tonic-gate break; 21310Sstevel@tonic-gate } 21320Sstevel@tonic-gate 21330Sstevel@tonic-gate /* Now remove pgcnt from freemem_left */ 21340Sstevel@tonic-gate freemem_left -= pgcnt; 21350Sstevel@tonic-gate ASSERT(freemem_left >= 0); 21360Sstevel@tonic-gate szc = pp->p_szc; 21370Sstevel@tonic-gate while (pp != NULL) { 21380Sstevel@tonic-gate /* 21390Sstevel@tonic-gate * pp and pp_targ were passed back as 21400Sstevel@tonic-gate * a linked list of pages. 21410Sstevel@tonic-gate * Unlink and unlock each page. 21420Sstevel@tonic-gate */ 21430Sstevel@tonic-gate tpp_targ = pp_targ; 21440Sstevel@tonic-gate page_sub(&pp_targ, tpp_targ); 21450Sstevel@tonic-gate page_unlock(tpp_targ); 21460Sstevel@tonic-gate /* 21470Sstevel@tonic-gate * The original page is now free 21480Sstevel@tonic-gate * so remove it from the linked 21490Sstevel@tonic-gate * list and collect it. 21500Sstevel@tonic-gate */ 21510Sstevel@tonic-gate tpp = pp; 21520Sstevel@tonic-gate page_sub(&pp, tpp); 21530Sstevel@tonic-gate pfn = page_pptonum(tpp); 21540Sstevel@tonic-gate collected++; 21550Sstevel@tonic-gate ASSERT(PAGE_EXCL(tpp)); 21560Sstevel@tonic-gate ASSERT(tpp->p_vnode == NULL); 21570Sstevel@tonic-gate ASSERT(!hat_page_is_mapped(tpp)); 21580Sstevel@tonic-gate ASSERT(tpp->p_szc == szc); 21590Sstevel@tonic-gate tpp->p_szc = 0; 21600Sstevel@tonic-gate page_delete_collect(tpp, mhp); 21610Sstevel@tonic-gate bit = pfn - mdsp->mds_base; 21620Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |= 21630Sstevel@tonic-gate (1 << (bit % NBPBMW)); 21640Sstevel@tonic-gate } 21650Sstevel@tonic-gate ASSERT(pp_targ == NULL); 21660Sstevel@tonic-gate } 21670Sstevel@tonic-gate } 21680Sstevel@tonic-gate first_scan = 0; 21690Sstevel@tonic-gate if ((mhp->mh_cancel == 0) && (mhp->mh_hold_todo != 0) && 21700Sstevel@tonic-gate (collected == 0)) { 21710Sstevel@tonic-gate /* 21720Sstevel@tonic-gate * This code is needed as we cannot wait 21730Sstevel@tonic-gate * for a page to be locked OR the delete to 21740Sstevel@tonic-gate * be cancelled. Also, we must delay so 21750Sstevel@tonic-gate * that other threads get a chance to run 21760Sstevel@tonic-gate * on our cpu, otherwise page locks may be 21770Sstevel@tonic-gate * held indefinitely by those threads. 21780Sstevel@tonic-gate */ 21790Sstevel@tonic-gate MDSTAT_INCR(mhp, ndelay); 21800Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 21810Sstevel@tonic-gate (void) cv_timedwait(&mhp->mh_cv, &mhp->mh_mutex, 21820Sstevel@tonic-gate (lbolt + DEL_BUSY_WAIT_TICKS)); 21830Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &mhp->mh_mutex); 21840Sstevel@tonic-gate } 21850Sstevel@tonic-gate } 21860Sstevel@tonic-gate /* stop the dr aio cleanup thread */ 21870Sstevel@tonic-gate mhp->mh_dr_aio_cleanup_cancel = 1; 21880Sstevel@tonic-gate transit_list_collect(mhp, 0); 21890Sstevel@tonic-gate if (freemem_left != 0) { 21900Sstevel@tonic-gate /* Return any surplus. */ 21910Sstevel@tonic-gate page_create_putback(freemem_left); 21920Sstevel@tonic-gate freemem_left = 0; 21930Sstevel@tonic-gate } 21940Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 21950Sstevel@tonic-gate mdsp = mdsp->mds_next) { 21960Sstevel@tonic-gate mem_node_post_del_slice(mdsp->mds_base, 21970Sstevel@tonic-gate mdsp->mds_base + mdsp->mds_npgs - 1, 21980Sstevel@tonic-gate (mhp->mh_cancel != 0)); 21990Sstevel@tonic-gate } 22000Sstevel@tonic-gate #ifdef MEM_DEL_STATS 22010Sstevel@tonic-gate ntick_total = (uint64_t)ddi_get_lbolt() - start_total; 22020Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 22030Sstevel@tonic-gate MDSTAT_TOTAL(mhp, ntick_total); 22040Sstevel@tonic-gate MDSTAT_PRINT(mhp); 22050Sstevel@tonic-gate 22060Sstevel@tonic-gate /* 22070Sstevel@tonic-gate * If the memory delete was cancelled, exclusive-wanted bits must 2208917Selowe * be cleared. If there are retired pages being deleted, they need 2209917Selowe * to be unretired. 22100Sstevel@tonic-gate */ 22110Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 22120Sstevel@tonic-gate mdsp = mdsp->mds_next) { 22130Sstevel@tonic-gate pfn_t pfn, p_end; 22140Sstevel@tonic-gate 22150Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs; 22160Sstevel@tonic-gate for (pfn = mdsp->mds_base; pfn < p_end; pfn++) { 22170Sstevel@tonic-gate page_t *pp; 22180Sstevel@tonic-gate pgcnt_t bit; 22190Sstevel@tonic-gate 22200Sstevel@tonic-gate bit = pfn - mdsp->mds_base; 22210Sstevel@tonic-gate if (mhp->mh_cancel) { 22220Sstevel@tonic-gate pp = page_numtopp_nolock(pfn); 22230Sstevel@tonic-gate if (pp != NULL) { 22240Sstevel@tonic-gate if ((mdsp->mds_bitmap[bit / NBPBMW] & 22250Sstevel@tonic-gate (1 << (bit % NBPBMW))) == 0) { 22260Sstevel@tonic-gate page_lock_clr_exclwanted(pp); 22270Sstevel@tonic-gate } 22280Sstevel@tonic-gate } 22290Sstevel@tonic-gate } else { 22300Sstevel@tonic-gate pp = NULL; 22310Sstevel@tonic-gate } 22320Sstevel@tonic-gate if ((mdsp->mds_bitmap_retired[bit / NBPBMW] & 22330Sstevel@tonic-gate (1 << (bit % NBPBMW))) != 0) { 22340Sstevel@tonic-gate /* do we already have pp? */ 22350Sstevel@tonic-gate if (pp == NULL) { 22360Sstevel@tonic-gate pp = page_numtopp_nolock(pfn); 22370Sstevel@tonic-gate } 22380Sstevel@tonic-gate ASSERT(pp != NULL); 2239917Selowe ASSERT(PP_RETIRED(pp)); 22400Sstevel@tonic-gate if (mhp->mh_cancel != 0) { 2241917Selowe page_unlock(pp); 22420Sstevel@tonic-gate /* 22430Sstevel@tonic-gate * To satisfy ASSERT below in 22440Sstevel@tonic-gate * cancel code. 22450Sstevel@tonic-gate */ 22460Sstevel@tonic-gate mhp->mh_hold_todo++; 22470Sstevel@tonic-gate } else { 22483253Smec (void) page_unretire_pp(pp, 22493253Smec PR_UNR_CLEAN); 22500Sstevel@tonic-gate } 22510Sstevel@tonic-gate } 22520Sstevel@tonic-gate } 22530Sstevel@tonic-gate } 22540Sstevel@tonic-gate /* 22550Sstevel@tonic-gate * Free retired page bitmap and collected page bitmap 22560Sstevel@tonic-gate */ 22570Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 22580Sstevel@tonic-gate mdsp = mdsp->mds_next) { 22590Sstevel@tonic-gate ASSERT(mdsp->mds_bitmap_retired != NULL); 22600Sstevel@tonic-gate kmem_free(mdsp->mds_bitmap_retired, MDS_BITMAPBYTES(mdsp)); 22610Sstevel@tonic-gate mdsp->mds_bitmap_retired = NULL; /* Paranoia. */ 22620Sstevel@tonic-gate ASSERT(mdsp->mds_bitmap != NULL); 22630Sstevel@tonic-gate kmem_free(mdsp->mds_bitmap, MDS_BITMAPBYTES(mdsp)); 22640Sstevel@tonic-gate mdsp->mds_bitmap = NULL; /* Paranoia. */ 22650Sstevel@tonic-gate } 22660Sstevel@tonic-gate 22670Sstevel@tonic-gate /* wait for our dr aio cancel thread to exit */ 22680Sstevel@tonic-gate while (!(mhp->mh_aio_cleanup_done)) { 22690Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 22700Sstevel@tonic-gate delay(drv_usectohz(DR_AIO_CLEANUP_DELAY)); 22710Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &mhp->mh_mutex); 22720Sstevel@tonic-gate } 22730Sstevel@tonic-gate refused: 22740Sstevel@tonic-gate if (mhp->mh_cancel != 0) { 22750Sstevel@tonic-gate page_t *pp; 22760Sstevel@tonic-gate 22770Sstevel@tonic-gate comp_code = mhp->mh_cancel; 22780Sstevel@tonic-gate /* 22790Sstevel@tonic-gate * Go through list of deleted pages (mh_deleted) freeing 22800Sstevel@tonic-gate * them. 22810Sstevel@tonic-gate */ 22820Sstevel@tonic-gate while ((pp = mhp->mh_deleted) != NULL) { 22830Sstevel@tonic-gate mhp->mh_deleted = pp->p_next; 22840Sstevel@tonic-gate mhp->mh_hold_todo++; 22850Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 22860Sstevel@tonic-gate /* Restore p_next. */ 22870Sstevel@tonic-gate pp->p_next = pp->p_prev; 22880Sstevel@tonic-gate if (PP_ISFREE(pp)) { 22890Sstevel@tonic-gate cmn_err(CE_PANIC, 22900Sstevel@tonic-gate "page %p is free", 22910Sstevel@tonic-gate (void *)pp); 22920Sstevel@tonic-gate } 22930Sstevel@tonic-gate page_free(pp, 1); 22940Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 22950Sstevel@tonic-gate } 22960Sstevel@tonic-gate ASSERT(mhp->mh_hold_todo == mhp->mh_vm_pages); 22970Sstevel@tonic-gate 22980Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 22990Sstevel@tonic-gate put_availrmem(mhp->mh_vm_pages); 23000Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 23010Sstevel@tonic-gate 23020Sstevel@tonic-gate goto t_exit; 23030Sstevel@tonic-gate } 23040Sstevel@tonic-gate 23050Sstevel@tonic-gate /* 23060Sstevel@tonic-gate * All the pages are no longer in use and are exclusively locked. 23070Sstevel@tonic-gate */ 23080Sstevel@tonic-gate 23090Sstevel@tonic-gate mhp->mh_deleted = NULL; 23100Sstevel@tonic-gate 23110Sstevel@tonic-gate kphysm_del_cleanup(mhp); 23120Sstevel@tonic-gate 23130Sstevel@tonic-gate comp_code = KPHYSM_OK; 23140Sstevel@tonic-gate 23150Sstevel@tonic-gate t_exit: 23160Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 23170Sstevel@tonic-gate kphysm_setup_post_del(mhp->mh_vm_pages, 23180Sstevel@tonic-gate (comp_code == KPHYSM_OK) ? 0 : 1); 23190Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 23200Sstevel@tonic-gate 23210Sstevel@tonic-gate early_exit: 23220Sstevel@tonic-gate /* mhp->mh_mutex exited by CALLB_CPR_EXIT() */ 23230Sstevel@tonic-gate mhp->mh_state = MHND_DONE; 23240Sstevel@tonic-gate del_complete_funcp = mhp->mh_delete_complete; 23250Sstevel@tonic-gate del_complete_arg = mhp->mh_delete_complete_arg; 23260Sstevel@tonic-gate CALLB_CPR_EXIT(&cprinfo); 23270Sstevel@tonic-gate (*del_complete_funcp)(del_complete_arg, comp_code); 23280Sstevel@tonic-gate thread_exit(); 23290Sstevel@tonic-gate /*NOTREACHED*/ 23300Sstevel@tonic-gate } 23310Sstevel@tonic-gate 23320Sstevel@tonic-gate /* 23330Sstevel@tonic-gate * Start the delete of the memory from the system. 23340Sstevel@tonic-gate */ 23350Sstevel@tonic-gate int 23360Sstevel@tonic-gate kphysm_del_start( 23370Sstevel@tonic-gate memhandle_t handle, 23380Sstevel@tonic-gate void (*complete)(void *, int), 23390Sstevel@tonic-gate void *complete_arg) 23400Sstevel@tonic-gate { 23410Sstevel@tonic-gate struct mem_handle *mhp; 23420Sstevel@tonic-gate 23430Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle); 23440Sstevel@tonic-gate if (mhp == NULL) { 23450Sstevel@tonic-gate return (KPHYSM_EHANDLE); 23460Sstevel@tonic-gate } 23470Sstevel@tonic-gate switch (mhp->mh_state) { 23480Sstevel@tonic-gate case MHND_FREE: 23490Sstevel@tonic-gate ASSERT(mhp->mh_state != MHND_FREE); 23500Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 23510Sstevel@tonic-gate return (KPHYSM_EHANDLE); 23520Sstevel@tonic-gate case MHND_INIT: 23530Sstevel@tonic-gate break; 23540Sstevel@tonic-gate case MHND_STARTING: 23550Sstevel@tonic-gate case MHND_RUNNING: 23560Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 23570Sstevel@tonic-gate return (KPHYSM_ESEQUENCE); 23580Sstevel@tonic-gate case MHND_DONE: 23590Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 23600Sstevel@tonic-gate return (KPHYSM_ESEQUENCE); 23610Sstevel@tonic-gate case MHND_RELEASE: 23620Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 23630Sstevel@tonic-gate return (KPHYSM_ESEQUENCE); 23640Sstevel@tonic-gate default: 23650Sstevel@tonic-gate #ifdef DEBUG 23660Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_del_start(0x%p) state corrupt %d", 23670Sstevel@tonic-gate (void *)mhp, mhp->mh_state); 23680Sstevel@tonic-gate #endif /* DEBUG */ 23690Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 23700Sstevel@tonic-gate return (KPHYSM_EHANDLE); 23710Sstevel@tonic-gate } 23720Sstevel@tonic-gate 23730Sstevel@tonic-gate if (mhp->mh_transit.trl_spans == NULL) { 23740Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 23750Sstevel@tonic-gate return (KPHYSM_ENOWORK); 23760Sstevel@tonic-gate } 23770Sstevel@tonic-gate 23780Sstevel@tonic-gate ASSERT(complete != NULL); 23790Sstevel@tonic-gate mhp->mh_delete_complete = complete; 23800Sstevel@tonic-gate mhp->mh_delete_complete_arg = complete_arg; 23810Sstevel@tonic-gate mhp->mh_state = MHND_STARTING; 23820Sstevel@tonic-gate /* 23830Sstevel@tonic-gate * Release the mutex in case thread_create sleeps. 23840Sstevel@tonic-gate */ 23850Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 23860Sstevel@tonic-gate 23870Sstevel@tonic-gate /* 23880Sstevel@tonic-gate * The "obvious" process for this thread is pageout (proc_pageout) 23890Sstevel@tonic-gate * but this gives the thread too much power over freemem 23900Sstevel@tonic-gate * which results in freemem starvation. 23910Sstevel@tonic-gate */ 23920Sstevel@tonic-gate (void) thread_create(NULL, 0, delete_memory_thread, mhp, 0, &p0, 23930Sstevel@tonic-gate TS_RUN, maxclsyspri - 1); 23940Sstevel@tonic-gate 23950Sstevel@tonic-gate return (KPHYSM_OK); 23960Sstevel@tonic-gate } 23970Sstevel@tonic-gate 23980Sstevel@tonic-gate static kmutex_t pp_dummy_lock; /* Protects init. of pp_dummy. */ 23990Sstevel@tonic-gate static caddr_t pp_dummy; 24000Sstevel@tonic-gate static pgcnt_t pp_dummy_npages; 24010Sstevel@tonic-gate static pfn_t *pp_dummy_pfn; /* Array of dummy pfns. */ 24020Sstevel@tonic-gate 24030Sstevel@tonic-gate static void 24040Sstevel@tonic-gate memseg_remap_init_pages(page_t *pages, page_t *epages) 24050Sstevel@tonic-gate { 24060Sstevel@tonic-gate page_t *pp; 24070Sstevel@tonic-gate 24080Sstevel@tonic-gate for (pp = pages; pp < epages; pp++) { 24090Sstevel@tonic-gate pp->p_pagenum = PFN_INVALID; /* XXXX */ 24100Sstevel@tonic-gate pp->p_offset = (u_offset_t)-1; 24110Sstevel@tonic-gate page_iolock_init(pp); 24120Sstevel@tonic-gate while (!page_lock(pp, SE_EXCL, (kmutex_t *)NULL, P_RECLAIM)) 24130Sstevel@tonic-gate continue; 24140Sstevel@tonic-gate page_lock_delete(pp); 24150Sstevel@tonic-gate } 24160Sstevel@tonic-gate } 24170Sstevel@tonic-gate 24180Sstevel@tonic-gate void 24190Sstevel@tonic-gate memseg_remap_init() 24200Sstevel@tonic-gate { 24210Sstevel@tonic-gate mutex_enter(&pp_dummy_lock); 24220Sstevel@tonic-gate if (pp_dummy == NULL) { 24230Sstevel@tonic-gate uint_t dpages; 24240Sstevel@tonic-gate int i; 24250Sstevel@tonic-gate 24260Sstevel@tonic-gate /* 24270Sstevel@tonic-gate * dpages starts off as the size of the structure and 24280Sstevel@tonic-gate * ends up as the minimum number of pages that will 24290Sstevel@tonic-gate * hold a whole number of page_t structures. 24300Sstevel@tonic-gate */ 24310Sstevel@tonic-gate dpages = sizeof (page_t); 24320Sstevel@tonic-gate ASSERT(dpages != 0); 24330Sstevel@tonic-gate ASSERT(dpages <= MMU_PAGESIZE); 24340Sstevel@tonic-gate 24350Sstevel@tonic-gate while ((dpages & 1) == 0) 24360Sstevel@tonic-gate dpages >>= 1; 24370Sstevel@tonic-gate 24380Sstevel@tonic-gate pp_dummy_npages = dpages; 24390Sstevel@tonic-gate /* 24400Sstevel@tonic-gate * Allocate pp_dummy pages directly from static_arena, 24410Sstevel@tonic-gate * since these are whole page allocations and are 24420Sstevel@tonic-gate * referenced by physical address. This also has the 24430Sstevel@tonic-gate * nice fringe benefit of hiding the memory from 24440Sstevel@tonic-gate * ::findleaks since it doesn't deal well with allocated 24450Sstevel@tonic-gate * kernel heap memory that doesn't have any mappings. 24460Sstevel@tonic-gate */ 24470Sstevel@tonic-gate pp_dummy = vmem_xalloc(static_arena, ptob(pp_dummy_npages), 24480Sstevel@tonic-gate PAGESIZE, 0, 0, NULL, NULL, VM_SLEEP); 24490Sstevel@tonic-gate bzero(pp_dummy, ptob(pp_dummy_npages)); 24500Sstevel@tonic-gate ASSERT(((uintptr_t)pp_dummy & MMU_PAGEOFFSET) == 0); 24510Sstevel@tonic-gate pp_dummy_pfn = kmem_alloc(sizeof (*pp_dummy_pfn) * 24520Sstevel@tonic-gate pp_dummy_npages, KM_SLEEP); 24530Sstevel@tonic-gate for (i = 0; i < pp_dummy_npages; i++) { 24540Sstevel@tonic-gate pp_dummy_pfn[i] = hat_getpfnum(kas.a_hat, 24550Sstevel@tonic-gate &pp_dummy[MMU_PAGESIZE * i]); 24560Sstevel@tonic-gate ASSERT(pp_dummy_pfn[i] != PFN_INVALID); 24570Sstevel@tonic-gate } 24580Sstevel@tonic-gate /* 24590Sstevel@tonic-gate * Initialize the page_t's to a known 'deleted' state 24600Sstevel@tonic-gate * that matches the state of deleted pages. 24610Sstevel@tonic-gate */ 24620Sstevel@tonic-gate memseg_remap_init_pages((page_t *)pp_dummy, 24630Sstevel@tonic-gate (page_t *)(pp_dummy + 24640Sstevel@tonic-gate ptob(pp_dummy_npages))); 24650Sstevel@tonic-gate /* Remove kmem mappings for the pages for safety. */ 24660Sstevel@tonic-gate hat_unload(kas.a_hat, pp_dummy, ptob(pp_dummy_npages), 24670Sstevel@tonic-gate HAT_UNLOAD_UNLOCK); 24680Sstevel@tonic-gate /* Leave pp_dummy pointer set as flag that init is done. */ 24690Sstevel@tonic-gate } 24700Sstevel@tonic-gate mutex_exit(&pp_dummy_lock); 24710Sstevel@tonic-gate } 24720Sstevel@tonic-gate 24730Sstevel@tonic-gate static void 24740Sstevel@tonic-gate memseg_remap_to_dummy(caddr_t pp, pgcnt_t metapgs) 24750Sstevel@tonic-gate { 24760Sstevel@tonic-gate ASSERT(pp_dummy != NULL); 24770Sstevel@tonic-gate 24780Sstevel@tonic-gate while (metapgs != 0) { 24790Sstevel@tonic-gate pgcnt_t n; 24800Sstevel@tonic-gate int i; 24810Sstevel@tonic-gate 24820Sstevel@tonic-gate n = pp_dummy_npages; 24830Sstevel@tonic-gate if (n > metapgs) 24840Sstevel@tonic-gate n = metapgs; 24850Sstevel@tonic-gate for (i = 0; i < n; i++) { 24860Sstevel@tonic-gate hat_devload(kas.a_hat, pp, ptob(1), pp_dummy_pfn[i], 24870Sstevel@tonic-gate PROT_READ, 24880Sstevel@tonic-gate HAT_LOAD | HAT_LOAD_NOCONSIST | 24890Sstevel@tonic-gate HAT_LOAD_REMAP); 24900Sstevel@tonic-gate pp += ptob(1); 24910Sstevel@tonic-gate } 24920Sstevel@tonic-gate metapgs -= n; 24930Sstevel@tonic-gate } 24940Sstevel@tonic-gate } 24950Sstevel@tonic-gate 24960Sstevel@tonic-gate /* 24970Sstevel@tonic-gate * Transition all the deleted pages to the deleted state so that 24980Sstevel@tonic-gate * page_lock will not wait. The page_lock_delete call will 24990Sstevel@tonic-gate * also wake up any waiters. 25000Sstevel@tonic-gate */ 25010Sstevel@tonic-gate static void 25020Sstevel@tonic-gate memseg_lock_delete_all(struct memseg *seg) 25030Sstevel@tonic-gate { 25040Sstevel@tonic-gate page_t *pp; 25050Sstevel@tonic-gate 25060Sstevel@tonic-gate for (pp = seg->pages; pp < seg->epages; pp++) { 25070Sstevel@tonic-gate pp->p_pagenum = PFN_INVALID; /* XXXX */ 25080Sstevel@tonic-gate page_lock_delete(pp); 25090Sstevel@tonic-gate } 25100Sstevel@tonic-gate } 25110Sstevel@tonic-gate 25120Sstevel@tonic-gate static void 25130Sstevel@tonic-gate kphysm_del_cleanup(struct mem_handle *mhp) 25140Sstevel@tonic-gate { 25150Sstevel@tonic-gate struct memdelspan *mdsp; 25160Sstevel@tonic-gate struct memseg *seg; 25170Sstevel@tonic-gate struct memseg **segpp; 25180Sstevel@tonic-gate struct memseg *seglist; 25190Sstevel@tonic-gate pfn_t p_end; 25200Sstevel@tonic-gate uint64_t avmem; 25210Sstevel@tonic-gate pgcnt_t avpgs; 25220Sstevel@tonic-gate pgcnt_t npgs; 25230Sstevel@tonic-gate 25240Sstevel@tonic-gate avpgs = mhp->mh_vm_pages; 25250Sstevel@tonic-gate 25260Sstevel@tonic-gate memsegs_lock(1); 25270Sstevel@tonic-gate 25280Sstevel@tonic-gate /* 25290Sstevel@tonic-gate * remove from main segment list. 25300Sstevel@tonic-gate */ 25310Sstevel@tonic-gate npgs = 0; 25320Sstevel@tonic-gate seglist = NULL; 25330Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 25340Sstevel@tonic-gate mdsp = mdsp->mds_next) { 25350Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs; 25360Sstevel@tonic-gate for (segpp = &memsegs; (seg = *segpp) != NULL; ) { 25370Sstevel@tonic-gate if (seg->pages_base >= p_end || 25380Sstevel@tonic-gate seg->pages_end <= mdsp->mds_base) { 25390Sstevel@tonic-gate /* Span and memseg don't overlap. */ 25400Sstevel@tonic-gate segpp = &((*segpp)->next); 25410Sstevel@tonic-gate continue; 25420Sstevel@tonic-gate } 25430Sstevel@tonic-gate ASSERT(seg->pages_base >= mdsp->mds_base); 25440Sstevel@tonic-gate ASSERT(seg->pages_end <= p_end); 25450Sstevel@tonic-gate 25461373Skchow PLCNT_MODIFY_MAX(seg->pages_base, 25471373Skchow seg->pages_base - seg->pages_end); 25481373Skchow 25490Sstevel@tonic-gate /* Hide the memseg from future scans. */ 25500Sstevel@tonic-gate hat_kpm_delmem_mseg_update(seg, segpp); 25510Sstevel@tonic-gate *segpp = seg->next; 25520Sstevel@tonic-gate membar_producer(); /* TODO: Needed? */ 25530Sstevel@tonic-gate npgs += MSEG_NPAGES(seg); 25540Sstevel@tonic-gate 25550Sstevel@tonic-gate /* 25560Sstevel@tonic-gate * Leave the deleted segment's next pointer intact 25570Sstevel@tonic-gate * in case a memsegs scanning loop is walking this 25580Sstevel@tonic-gate * segment concurrently. 25590Sstevel@tonic-gate */ 25600Sstevel@tonic-gate seg->lnext = seglist; 25610Sstevel@tonic-gate seglist = seg; 25620Sstevel@tonic-gate } 25630Sstevel@tonic-gate } 25640Sstevel@tonic-gate 25650Sstevel@tonic-gate build_pfn_hash(); 25660Sstevel@tonic-gate 25670Sstevel@tonic-gate ASSERT(npgs < total_pages); 25680Sstevel@tonic-gate total_pages -= npgs; 25690Sstevel@tonic-gate 25700Sstevel@tonic-gate /* 25710Sstevel@tonic-gate * Recalculate the paging parameters now total_pages has changed. 25720Sstevel@tonic-gate * This will also cause the clock hands to be reset before next use. 25730Sstevel@tonic-gate */ 25740Sstevel@tonic-gate setupclock(1); 25750Sstevel@tonic-gate 25760Sstevel@tonic-gate memsegs_unlock(1); 25770Sstevel@tonic-gate 25780Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 25790Sstevel@tonic-gate 25800Sstevel@tonic-gate while ((seg = seglist) != NULL) { 25810Sstevel@tonic-gate pfn_t mseg_start; 25820Sstevel@tonic-gate pfn_t mseg_base, mseg_end; 25830Sstevel@tonic-gate pgcnt_t mseg_npgs; 25840Sstevel@tonic-gate page_t *pp; 25850Sstevel@tonic-gate pgcnt_t metapgs; 25860Sstevel@tonic-gate int dynamic; 25870Sstevel@tonic-gate int mlret; 25880Sstevel@tonic-gate 25890Sstevel@tonic-gate seglist = seg->lnext; 25900Sstevel@tonic-gate 25910Sstevel@tonic-gate /* 25920Sstevel@tonic-gate * Put the page_t's into the deleted state to stop 25930Sstevel@tonic-gate * cv_wait()s on the pages. When we remap, the dummy 25940Sstevel@tonic-gate * page_t's will be in the same state. 25950Sstevel@tonic-gate */ 25960Sstevel@tonic-gate memseg_lock_delete_all(seg); 25970Sstevel@tonic-gate /* 25980Sstevel@tonic-gate * Collect up information based on pages_base and pages_end 25990Sstevel@tonic-gate * early so that we can flag early that the memseg has been 26000Sstevel@tonic-gate * deleted by setting pages_end == pages_base. 26010Sstevel@tonic-gate */ 26020Sstevel@tonic-gate mseg_base = seg->pages_base; 26030Sstevel@tonic-gate mseg_end = seg->pages_end; 26040Sstevel@tonic-gate mseg_npgs = MSEG_NPAGES(seg); 26050Sstevel@tonic-gate dynamic = memseg_is_dynamic(seg, &mseg_start); 26060Sstevel@tonic-gate 26070Sstevel@tonic-gate seg->pages_end = seg->pages_base; 26080Sstevel@tonic-gate 26090Sstevel@tonic-gate if (dynamic) { 26100Sstevel@tonic-gate pp = seg->pages; 26110Sstevel@tonic-gate metapgs = mseg_base - mseg_start; 26120Sstevel@tonic-gate ASSERT(metapgs != 0); 26130Sstevel@tonic-gate 26140Sstevel@tonic-gate /* Remap the meta data to our special dummy area. */ 26150Sstevel@tonic-gate memseg_remap_to_dummy((caddr_t)pp, metapgs); 26160Sstevel@tonic-gate 26170Sstevel@tonic-gate mutex_enter(&memseg_lists_lock); 26180Sstevel@tonic-gate seg->lnext = memseg_va_avail; 26190Sstevel@tonic-gate memseg_va_avail = seg; 26200Sstevel@tonic-gate mutex_exit(&memseg_lists_lock); 26210Sstevel@tonic-gate } else { 26220Sstevel@tonic-gate /* 26230Sstevel@tonic-gate * Set for clean-up below. 26240Sstevel@tonic-gate */ 26250Sstevel@tonic-gate mseg_start = seg->pages_base; 26260Sstevel@tonic-gate /* 26270Sstevel@tonic-gate * For memory whose page_ts were allocated 26280Sstevel@tonic-gate * at boot, we need to find a new use for 26290Sstevel@tonic-gate * the page_t memory. 26300Sstevel@tonic-gate * For the moment, just leak it. 26310Sstevel@tonic-gate * (It is held in the memseg_delete_junk list.) 26320Sstevel@tonic-gate */ 26330Sstevel@tonic-gate 26340Sstevel@tonic-gate mutex_enter(&memseg_lists_lock); 26350Sstevel@tonic-gate seg->lnext = memseg_delete_junk; 26360Sstevel@tonic-gate memseg_delete_junk = seg; 26370Sstevel@tonic-gate mutex_exit(&memseg_lists_lock); 26380Sstevel@tonic-gate } 26390Sstevel@tonic-gate 26400Sstevel@tonic-gate /* Must not use seg now as it could be re-used. */ 26410Sstevel@tonic-gate 26420Sstevel@tonic-gate memlist_write_lock(); 26430Sstevel@tonic-gate 26440Sstevel@tonic-gate mlret = memlist_delete_span( 26450Sstevel@tonic-gate (uint64_t)(mseg_base) << PAGESHIFT, 26460Sstevel@tonic-gate (uint64_t)(mseg_npgs) << PAGESHIFT, 26470Sstevel@tonic-gate &phys_avail); 26480Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK); 26490Sstevel@tonic-gate 26500Sstevel@tonic-gate mlret = memlist_delete_span( 26510Sstevel@tonic-gate (uint64_t)(mseg_start) << PAGESHIFT, 26520Sstevel@tonic-gate (uint64_t)(mseg_end - mseg_start) << 26530Sstevel@tonic-gate PAGESHIFT, 26540Sstevel@tonic-gate &phys_install); 26550Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK); 26560Sstevel@tonic-gate phys_install_has_changed(); 26570Sstevel@tonic-gate 26580Sstevel@tonic-gate memlist_write_unlock(); 26590Sstevel@tonic-gate } 26600Sstevel@tonic-gate 26610Sstevel@tonic-gate memlist_read_lock(); 26620Sstevel@tonic-gate installed_top_size(phys_install, &physmax, &physinstalled); 26630Sstevel@tonic-gate memlist_read_unlock(); 26640Sstevel@tonic-gate 26650Sstevel@tonic-gate mutex_enter(&freemem_lock); 26660Sstevel@tonic-gate maxmem -= avpgs; 26670Sstevel@tonic-gate physmem -= avpgs; 26680Sstevel@tonic-gate /* availrmem is adjusted during the delete. */ 26690Sstevel@tonic-gate availrmem_initial -= avpgs; 26700Sstevel@tonic-gate 26710Sstevel@tonic-gate mutex_exit(&freemem_lock); 26720Sstevel@tonic-gate 26730Sstevel@tonic-gate dump_resize(); 26740Sstevel@tonic-gate 26750Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_delete: mem = %ldK " 26760Sstevel@tonic-gate "(0x%" PRIx64 ")\n", 26770Sstevel@tonic-gate physinstalled << (PAGESHIFT - 10), 26780Sstevel@tonic-gate (uint64_t)physinstalled << PAGESHIFT); 26790Sstevel@tonic-gate 26800Sstevel@tonic-gate avmem = (uint64_t)freemem << PAGESHIFT; 26810Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_delete: " 26820Sstevel@tonic-gate "avail mem = %" PRId64 "\n", avmem); 26830Sstevel@tonic-gate 26840Sstevel@tonic-gate /* 26850Sstevel@tonic-gate * Update lgroup generation number on single lgroup systems 26860Sstevel@tonic-gate */ 26870Sstevel@tonic-gate if (nlgrps == 1) 26880Sstevel@tonic-gate lgrp_config(LGRP_CONFIG_GEN_UPDATE, 0, 0); 26890Sstevel@tonic-gate 26900Sstevel@tonic-gate /* Successfully deleted system memory */ 26910Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 26920Sstevel@tonic-gate } 26930Sstevel@tonic-gate 26940Sstevel@tonic-gate static uint_t mdel_nullvp_waiter; 26950Sstevel@tonic-gate 26960Sstevel@tonic-gate static void 26970Sstevel@tonic-gate page_delete_collect( 26980Sstevel@tonic-gate page_t *pp, 26990Sstevel@tonic-gate struct mem_handle *mhp) 27000Sstevel@tonic-gate { 27010Sstevel@tonic-gate if (pp->p_vnode) { 27020Sstevel@tonic-gate page_hashout(pp, (kmutex_t *)NULL); 27030Sstevel@tonic-gate /* do not do PP_SETAGED(pp); */ 27040Sstevel@tonic-gate } else { 27050Sstevel@tonic-gate kmutex_t *sep; 27060Sstevel@tonic-gate 27070Sstevel@tonic-gate sep = page_se_mutex(pp); 27080Sstevel@tonic-gate mutex_enter(sep); 27090Sstevel@tonic-gate if (CV_HAS_WAITERS(&pp->p_cv)) { 27100Sstevel@tonic-gate mdel_nullvp_waiter++; 27110Sstevel@tonic-gate cv_broadcast(&pp->p_cv); 27120Sstevel@tonic-gate } 27130Sstevel@tonic-gate mutex_exit(sep); 27140Sstevel@tonic-gate } 27150Sstevel@tonic-gate ASSERT(pp->p_next == pp->p_prev); 27160Sstevel@tonic-gate ASSERT(pp->p_next == NULL || pp->p_next == pp); 27170Sstevel@tonic-gate pp->p_next = mhp->mh_deleted; 27180Sstevel@tonic-gate mhp->mh_deleted = pp; 27190Sstevel@tonic-gate ASSERT(mhp->mh_hold_todo != 0); 27200Sstevel@tonic-gate mhp->mh_hold_todo--; 27210Sstevel@tonic-gate } 27220Sstevel@tonic-gate 27230Sstevel@tonic-gate static void 27240Sstevel@tonic-gate transit_list_collect(struct mem_handle *mhp, int v) 27250Sstevel@tonic-gate { 27260Sstevel@tonic-gate struct transit_list_head *trh; 27270Sstevel@tonic-gate 27280Sstevel@tonic-gate trh = &transit_list_head; 27290Sstevel@tonic-gate mutex_enter(&trh->trh_lock); 27300Sstevel@tonic-gate mhp->mh_transit.trl_collect = v; 27310Sstevel@tonic-gate mutex_exit(&trh->trh_lock); 27320Sstevel@tonic-gate } 27330Sstevel@tonic-gate 27340Sstevel@tonic-gate static void 27350Sstevel@tonic-gate transit_list_insert(struct transit_list *tlp) 27360Sstevel@tonic-gate { 27370Sstevel@tonic-gate struct transit_list_head *trh; 27380Sstevel@tonic-gate 27390Sstevel@tonic-gate trh = &transit_list_head; 27400Sstevel@tonic-gate ASSERT(MUTEX_HELD(&trh->trh_lock)); 27410Sstevel@tonic-gate tlp->trl_next = trh->trh_head; 27420Sstevel@tonic-gate trh->trh_head = tlp; 27430Sstevel@tonic-gate } 27440Sstevel@tonic-gate 27450Sstevel@tonic-gate static void 27460Sstevel@tonic-gate transit_list_remove(struct transit_list *tlp) 27470Sstevel@tonic-gate { 27480Sstevel@tonic-gate struct transit_list_head *trh; 27490Sstevel@tonic-gate struct transit_list **tlpp; 27500Sstevel@tonic-gate 27510Sstevel@tonic-gate trh = &transit_list_head; 27520Sstevel@tonic-gate tlpp = &trh->trh_head; 27530Sstevel@tonic-gate ASSERT(MUTEX_HELD(&trh->trh_lock)); 27540Sstevel@tonic-gate while (*tlpp != NULL && *tlpp != tlp) 27550Sstevel@tonic-gate tlpp = &(*tlpp)->trl_next; 27560Sstevel@tonic-gate ASSERT(*tlpp != NULL); 27570Sstevel@tonic-gate if (*tlpp == tlp) 27580Sstevel@tonic-gate *tlpp = tlp->trl_next; 27590Sstevel@tonic-gate tlp->trl_next = NULL; 27600Sstevel@tonic-gate } 27610Sstevel@tonic-gate 27620Sstevel@tonic-gate static struct transit_list * 27630Sstevel@tonic-gate pfnum_to_transit_list(struct transit_list_head *trh, pfn_t pfnum) 27640Sstevel@tonic-gate { 27650Sstevel@tonic-gate struct transit_list *tlp; 27660Sstevel@tonic-gate 27670Sstevel@tonic-gate for (tlp = trh->trh_head; tlp != NULL; tlp = tlp->trl_next) { 27680Sstevel@tonic-gate struct memdelspan *mdsp; 27690Sstevel@tonic-gate 27700Sstevel@tonic-gate for (mdsp = tlp->trl_spans; mdsp != NULL; 27710Sstevel@tonic-gate mdsp = mdsp->mds_next) { 27720Sstevel@tonic-gate if (pfnum >= mdsp->mds_base && 27730Sstevel@tonic-gate pfnum < (mdsp->mds_base + mdsp->mds_npgs)) { 27740Sstevel@tonic-gate return (tlp); 27750Sstevel@tonic-gate } 27760Sstevel@tonic-gate } 27770Sstevel@tonic-gate } 27780Sstevel@tonic-gate return (NULL); 27790Sstevel@tonic-gate } 27800Sstevel@tonic-gate 27810Sstevel@tonic-gate int 27820Sstevel@tonic-gate pfn_is_being_deleted(pfn_t pfnum) 27830Sstevel@tonic-gate { 27840Sstevel@tonic-gate struct transit_list_head *trh; 27850Sstevel@tonic-gate struct transit_list *tlp; 27860Sstevel@tonic-gate int ret; 27870Sstevel@tonic-gate 27880Sstevel@tonic-gate trh = &transit_list_head; 27890Sstevel@tonic-gate if (trh->trh_head == NULL) 27900Sstevel@tonic-gate return (0); 27910Sstevel@tonic-gate 27920Sstevel@tonic-gate mutex_enter(&trh->trh_lock); 27930Sstevel@tonic-gate tlp = pfnum_to_transit_list(trh, pfnum); 27940Sstevel@tonic-gate ret = (tlp != NULL && tlp->trl_collect); 27950Sstevel@tonic-gate mutex_exit(&trh->trh_lock); 27960Sstevel@tonic-gate 27970Sstevel@tonic-gate return (ret); 27980Sstevel@tonic-gate } 27990Sstevel@tonic-gate 28000Sstevel@tonic-gate #ifdef MEM_DEL_STATS 28010Sstevel@tonic-gate extern int hz; 28020Sstevel@tonic-gate static void 28030Sstevel@tonic-gate mem_del_stat_print_func(struct mem_handle *mhp) 28040Sstevel@tonic-gate { 28050Sstevel@tonic-gate uint64_t tmp; 28060Sstevel@tonic-gate 28070Sstevel@tonic-gate if (mem_del_stat_print) { 28080Sstevel@tonic-gate printf("memory delete loop %x/%x, statistics%s\n", 28090Sstevel@tonic-gate (uint_t)mhp->mh_transit.trl_spans->mds_base, 28100Sstevel@tonic-gate (uint_t)mhp->mh_transit.trl_spans->mds_npgs, 28110Sstevel@tonic-gate (mhp->mh_cancel ? " (cancelled)" : "")); 28120Sstevel@tonic-gate printf("\t%8u nloop\n", mhp->mh_delstat.nloop); 28130Sstevel@tonic-gate printf("\t%8u need_free\n", mhp->mh_delstat.need_free); 28140Sstevel@tonic-gate printf("\t%8u free_loop\n", mhp->mh_delstat.free_loop); 28150Sstevel@tonic-gate printf("\t%8u free_low\n", mhp->mh_delstat.free_low); 28160Sstevel@tonic-gate printf("\t%8u free_failed\n", mhp->mh_delstat.free_failed); 28170Sstevel@tonic-gate printf("\t%8u ncheck\n", mhp->mh_delstat.ncheck); 28180Sstevel@tonic-gate printf("\t%8u nopaget\n", mhp->mh_delstat.nopaget); 28190Sstevel@tonic-gate printf("\t%8u lockfail\n", mhp->mh_delstat.lockfail); 28200Sstevel@tonic-gate printf("\t%8u nfree\n", mhp->mh_delstat.nfree); 28210Sstevel@tonic-gate printf("\t%8u nreloc\n", mhp->mh_delstat.nreloc); 28220Sstevel@tonic-gate printf("\t%8u nrelocfail\n", mhp->mh_delstat.nrelocfail); 28230Sstevel@tonic-gate printf("\t%8u already_done\n", mhp->mh_delstat.already_done); 28240Sstevel@tonic-gate printf("\t%8u first_notfree\n", mhp->mh_delstat.first_notfree); 28250Sstevel@tonic-gate printf("\t%8u npplocked\n", mhp->mh_delstat.npplocked); 28260Sstevel@tonic-gate printf("\t%8u nlockreloc\n", mhp->mh_delstat.nlockreloc); 28270Sstevel@tonic-gate printf("\t%8u nnorepl\n", mhp->mh_delstat.nnorepl); 28280Sstevel@tonic-gate printf("\t%8u nmodreloc\n", mhp->mh_delstat.nmodreloc); 28290Sstevel@tonic-gate printf("\t%8u ndestroy\n", mhp->mh_delstat.ndestroy); 28300Sstevel@tonic-gate printf("\t%8u nputpage\n", mhp->mh_delstat.nputpage); 28310Sstevel@tonic-gate printf("\t%8u nnoreclaim\n", mhp->mh_delstat.nnoreclaim); 28320Sstevel@tonic-gate printf("\t%8u ndelay\n", mhp->mh_delstat.ndelay); 28330Sstevel@tonic-gate printf("\t%8u demotefail\n", mhp->mh_delstat.demotefail); 28340Sstevel@tonic-gate printf("\t%8u retired\n", mhp->mh_delstat.retired); 28350Sstevel@tonic-gate printf("\t%8u toxic\n", mhp->mh_delstat.toxic); 28360Sstevel@tonic-gate printf("\t%8u failing\n", mhp->mh_delstat.failing); 28370Sstevel@tonic-gate printf("\t%8u modtoxic\n", mhp->mh_delstat.modtoxic); 28380Sstevel@tonic-gate printf("\t%8u npplkdtoxic\n", mhp->mh_delstat.npplkdtoxic); 28390Sstevel@tonic-gate printf("\t%8u gptlmodfail\n", mhp->mh_delstat.gptlmodfail); 28400Sstevel@tonic-gate printf("\t%8u gptllckfail\n", mhp->mh_delstat.gptllckfail); 28410Sstevel@tonic-gate tmp = mhp->mh_delstat.nticks_total / hz; /* seconds */ 28420Sstevel@tonic-gate printf( 28430Sstevel@tonic-gate "\t%"PRIu64" nticks_total - %"PRIu64" min %"PRIu64" sec\n", 28440Sstevel@tonic-gate mhp->mh_delstat.nticks_total, tmp / 60, tmp % 60); 28450Sstevel@tonic-gate 28460Sstevel@tonic-gate tmp = mhp->mh_delstat.nticks_pgrp / hz; /* seconds */ 28470Sstevel@tonic-gate printf( 28480Sstevel@tonic-gate "\t%"PRIu64" nticks_pgrp - %"PRIu64" min %"PRIu64" sec\n", 28490Sstevel@tonic-gate mhp->mh_delstat.nticks_pgrp, tmp / 60, tmp % 60); 28500Sstevel@tonic-gate } 28510Sstevel@tonic-gate } 28520Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 28530Sstevel@tonic-gate 28540Sstevel@tonic-gate struct mem_callback { 28550Sstevel@tonic-gate kphysm_setup_vector_t *vec; 28560Sstevel@tonic-gate void *arg; 28570Sstevel@tonic-gate }; 28580Sstevel@tonic-gate 28590Sstevel@tonic-gate #define NMEMCALLBACKS 100 28600Sstevel@tonic-gate 28610Sstevel@tonic-gate static struct mem_callback mem_callbacks[NMEMCALLBACKS]; 28620Sstevel@tonic-gate static uint_t nmemcallbacks; 28630Sstevel@tonic-gate static krwlock_t mem_callback_rwlock; 28640Sstevel@tonic-gate 28650Sstevel@tonic-gate int 28660Sstevel@tonic-gate kphysm_setup_func_register(kphysm_setup_vector_t *vec, void *arg) 28670Sstevel@tonic-gate { 28680Sstevel@tonic-gate uint_t i, found; 28690Sstevel@tonic-gate 28700Sstevel@tonic-gate /* 28710Sstevel@tonic-gate * This test will become more complicated when the version must 28720Sstevel@tonic-gate * change. 28730Sstevel@tonic-gate */ 28740Sstevel@tonic-gate if (vec->version != KPHYSM_SETUP_VECTOR_VERSION) 28750Sstevel@tonic-gate return (EINVAL); 28760Sstevel@tonic-gate 28770Sstevel@tonic-gate if (vec->post_add == NULL || vec->pre_del == NULL || 28780Sstevel@tonic-gate vec->post_del == NULL) 28790Sstevel@tonic-gate return (EINVAL); 28800Sstevel@tonic-gate 28810Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_WRITER); 28820Sstevel@tonic-gate for (i = 0, found = 0; i < nmemcallbacks; i++) { 28830Sstevel@tonic-gate if (mem_callbacks[i].vec == NULL && found == 0) 28840Sstevel@tonic-gate found = i + 1; 28850Sstevel@tonic-gate if (mem_callbacks[i].vec == vec && 28860Sstevel@tonic-gate mem_callbacks[i].arg == arg) { 28870Sstevel@tonic-gate #ifdef DEBUG 28880Sstevel@tonic-gate /* Catch this in DEBUG kernels. */ 28890Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_setup_func_register" 28900Sstevel@tonic-gate "(0x%p, 0x%p) duplicate registration from 0x%p", 28910Sstevel@tonic-gate (void *)vec, arg, (void *)caller()); 28920Sstevel@tonic-gate #endif /* DEBUG */ 28930Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 28940Sstevel@tonic-gate return (EEXIST); 28950Sstevel@tonic-gate } 28960Sstevel@tonic-gate } 28970Sstevel@tonic-gate if (found != 0) { 28980Sstevel@tonic-gate i = found - 1; 28990Sstevel@tonic-gate } else { 29000Sstevel@tonic-gate ASSERT(nmemcallbacks < NMEMCALLBACKS); 29010Sstevel@tonic-gate if (nmemcallbacks == NMEMCALLBACKS) { 29020Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 29030Sstevel@tonic-gate return (ENOMEM); 29040Sstevel@tonic-gate } 29050Sstevel@tonic-gate i = nmemcallbacks++; 29060Sstevel@tonic-gate } 29070Sstevel@tonic-gate mem_callbacks[i].vec = vec; 29080Sstevel@tonic-gate mem_callbacks[i].arg = arg; 29090Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 29100Sstevel@tonic-gate return (0); 29110Sstevel@tonic-gate } 29120Sstevel@tonic-gate 29130Sstevel@tonic-gate void 29140Sstevel@tonic-gate kphysm_setup_func_unregister(kphysm_setup_vector_t *vec, void *arg) 29150Sstevel@tonic-gate { 29160Sstevel@tonic-gate uint_t i; 29170Sstevel@tonic-gate 29180Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_WRITER); 29190Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) { 29200Sstevel@tonic-gate if (mem_callbacks[i].vec == vec && 29210Sstevel@tonic-gate mem_callbacks[i].arg == arg) { 29220Sstevel@tonic-gate mem_callbacks[i].vec = NULL; 29230Sstevel@tonic-gate mem_callbacks[i].arg = NULL; 29240Sstevel@tonic-gate if (i == (nmemcallbacks - 1)) 29250Sstevel@tonic-gate nmemcallbacks--; 29260Sstevel@tonic-gate break; 29270Sstevel@tonic-gate } 29280Sstevel@tonic-gate } 29290Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 29300Sstevel@tonic-gate } 29310Sstevel@tonic-gate 29320Sstevel@tonic-gate static void 29330Sstevel@tonic-gate kphysm_setup_post_add(pgcnt_t delta_pages) 29340Sstevel@tonic-gate { 29350Sstevel@tonic-gate uint_t i; 29360Sstevel@tonic-gate 29370Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_READER); 29380Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) { 29390Sstevel@tonic-gate if (mem_callbacks[i].vec != NULL) { 29400Sstevel@tonic-gate (*mem_callbacks[i].vec->post_add) 29410Sstevel@tonic-gate (mem_callbacks[i].arg, delta_pages); 29420Sstevel@tonic-gate } 29430Sstevel@tonic-gate } 29440Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 29450Sstevel@tonic-gate } 29460Sstevel@tonic-gate 29470Sstevel@tonic-gate /* 29480Sstevel@tonic-gate * Note the locking between pre_del and post_del: The reader lock is held 29490Sstevel@tonic-gate * between the two calls to stop the set of functions from changing. 29500Sstevel@tonic-gate */ 29510Sstevel@tonic-gate 29520Sstevel@tonic-gate static int 29530Sstevel@tonic-gate kphysm_setup_pre_del(pgcnt_t delta_pages) 29540Sstevel@tonic-gate { 29550Sstevel@tonic-gate uint_t i; 29560Sstevel@tonic-gate int ret; 29570Sstevel@tonic-gate int aret; 29580Sstevel@tonic-gate 29590Sstevel@tonic-gate ret = 0; 29600Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_READER); 29610Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) { 29620Sstevel@tonic-gate if (mem_callbacks[i].vec != NULL) { 29630Sstevel@tonic-gate aret = (*mem_callbacks[i].vec->pre_del) 29640Sstevel@tonic-gate (mem_callbacks[i].arg, delta_pages); 29650Sstevel@tonic-gate ret |= aret; 29660Sstevel@tonic-gate } 29670Sstevel@tonic-gate } 29680Sstevel@tonic-gate 29690Sstevel@tonic-gate return (ret); 29700Sstevel@tonic-gate } 29710Sstevel@tonic-gate 29720Sstevel@tonic-gate static void 29730Sstevel@tonic-gate kphysm_setup_post_del(pgcnt_t delta_pages, int cancelled) 29740Sstevel@tonic-gate { 29750Sstevel@tonic-gate uint_t i; 29760Sstevel@tonic-gate 29770Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) { 29780Sstevel@tonic-gate if (mem_callbacks[i].vec != NULL) { 29790Sstevel@tonic-gate (*mem_callbacks[i].vec->post_del) 29800Sstevel@tonic-gate (mem_callbacks[i].arg, delta_pages, cancelled); 29810Sstevel@tonic-gate } 29820Sstevel@tonic-gate } 29830Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 29840Sstevel@tonic-gate } 29850Sstevel@tonic-gate 29860Sstevel@tonic-gate static int 29870Sstevel@tonic-gate kphysm_split_memseg( 29880Sstevel@tonic-gate pfn_t base, 29890Sstevel@tonic-gate pgcnt_t npgs) 29900Sstevel@tonic-gate { 29910Sstevel@tonic-gate struct memseg *seg; 29920Sstevel@tonic-gate struct memseg **segpp; 29930Sstevel@tonic-gate pgcnt_t size_low, size_high; 29940Sstevel@tonic-gate struct memseg *seg_low, *seg_mid, *seg_high; 29950Sstevel@tonic-gate 29960Sstevel@tonic-gate /* 29970Sstevel@tonic-gate * Lock the memsegs list against other updates now 29980Sstevel@tonic-gate */ 29990Sstevel@tonic-gate memsegs_lock(1); 30000Sstevel@tonic-gate 30010Sstevel@tonic-gate /* 30020Sstevel@tonic-gate * Find boot time memseg that wholly covers this area. 30030Sstevel@tonic-gate */ 30040Sstevel@tonic-gate 30050Sstevel@tonic-gate /* First find the memseg with page 'base' in it. */ 30060Sstevel@tonic-gate for (segpp = &memsegs; (seg = *segpp) != NULL; 30070Sstevel@tonic-gate segpp = &((*segpp)->next)) { 30080Sstevel@tonic-gate if (base >= seg->pages_base && base < seg->pages_end) 30090Sstevel@tonic-gate break; 30100Sstevel@tonic-gate } 30110Sstevel@tonic-gate if (seg == NULL) { 30120Sstevel@tonic-gate memsegs_unlock(1); 30130Sstevel@tonic-gate return (0); 30140Sstevel@tonic-gate } 30150Sstevel@tonic-gate if (memseg_is_dynamic(seg, (pfn_t *)NULL)) { 30160Sstevel@tonic-gate memsegs_unlock(1); 30170Sstevel@tonic-gate return (0); 30180Sstevel@tonic-gate } 30190Sstevel@tonic-gate if ((base + npgs) > seg->pages_end) { 30200Sstevel@tonic-gate memsegs_unlock(1); 30210Sstevel@tonic-gate return (0); 30220Sstevel@tonic-gate } 30230Sstevel@tonic-gate 30240Sstevel@tonic-gate /* 30250Sstevel@tonic-gate * Work out the size of the two segments that will 30260Sstevel@tonic-gate * surround the new segment, one for low address 30270Sstevel@tonic-gate * and one for high. 30280Sstevel@tonic-gate */ 30290Sstevel@tonic-gate ASSERT(base >= seg->pages_base); 30300Sstevel@tonic-gate size_low = base - seg->pages_base; 30310Sstevel@tonic-gate ASSERT(seg->pages_end >= (base + npgs)); 30320Sstevel@tonic-gate size_high = seg->pages_end - (base + npgs); 30330Sstevel@tonic-gate 30340Sstevel@tonic-gate /* 30350Sstevel@tonic-gate * Sanity check. 30360Sstevel@tonic-gate */ 30370Sstevel@tonic-gate if ((size_low + size_high) == 0) { 30380Sstevel@tonic-gate memsegs_unlock(1); 30390Sstevel@tonic-gate return (0); 30400Sstevel@tonic-gate } 30410Sstevel@tonic-gate 30420Sstevel@tonic-gate /* 30430Sstevel@tonic-gate * Allocate the new structures. The old memseg will not be freed 30440Sstevel@tonic-gate * as there may be a reference to it. 30450Sstevel@tonic-gate */ 30460Sstevel@tonic-gate seg_low = NULL; 30470Sstevel@tonic-gate seg_high = NULL; 30480Sstevel@tonic-gate 30490Sstevel@tonic-gate if (size_low != 0) { 30500Sstevel@tonic-gate seg_low = kmem_cache_alloc(memseg_cache, KM_SLEEP); 30510Sstevel@tonic-gate bzero(seg_low, sizeof (struct memseg)); 30520Sstevel@tonic-gate } 30530Sstevel@tonic-gate 30540Sstevel@tonic-gate seg_mid = kmem_cache_alloc(memseg_cache, KM_SLEEP); 30550Sstevel@tonic-gate bzero(seg_mid, sizeof (struct memseg)); 30560Sstevel@tonic-gate 30570Sstevel@tonic-gate if (size_high != 0) { 30580Sstevel@tonic-gate seg_high = kmem_cache_alloc(memseg_cache, KM_SLEEP); 30590Sstevel@tonic-gate bzero(seg_high, sizeof (struct memseg)); 30600Sstevel@tonic-gate } 30610Sstevel@tonic-gate 30620Sstevel@tonic-gate /* 30630Sstevel@tonic-gate * All allocation done now. 30640Sstevel@tonic-gate */ 30650Sstevel@tonic-gate if (size_low != 0) { 30660Sstevel@tonic-gate seg_low->pages = seg->pages; 30670Sstevel@tonic-gate seg_low->epages = seg_low->pages + size_low; 30680Sstevel@tonic-gate seg_low->pages_base = seg->pages_base; 30690Sstevel@tonic-gate seg_low->pages_end = seg_low->pages_base + size_low; 30700Sstevel@tonic-gate seg_low->next = seg_mid; 30710Sstevel@tonic-gate } 30720Sstevel@tonic-gate if (size_high != 0) { 30730Sstevel@tonic-gate seg_high->pages = seg->epages - size_high; 30740Sstevel@tonic-gate seg_high->epages = seg_high->pages + size_high; 30750Sstevel@tonic-gate seg_high->pages_base = seg->pages_end - size_high; 30760Sstevel@tonic-gate seg_high->pages_end = seg_high->pages_base + size_high; 30770Sstevel@tonic-gate seg_high->next = seg->next; 30780Sstevel@tonic-gate } 30790Sstevel@tonic-gate 30800Sstevel@tonic-gate seg_mid->pages = seg->pages + size_low; 30810Sstevel@tonic-gate seg_mid->pages_base = seg->pages_base + size_low; 30820Sstevel@tonic-gate seg_mid->epages = seg->epages - size_high; 30830Sstevel@tonic-gate seg_mid->pages_end = seg->pages_end - size_high; 30840Sstevel@tonic-gate seg_mid->next = (seg_high != NULL) ? seg_high : seg->next; 30850Sstevel@tonic-gate 30860Sstevel@tonic-gate /* 30870Sstevel@tonic-gate * Update hat_kpm specific info of all involved memsegs and 30880Sstevel@tonic-gate * allow hat_kpm specific global chain updates. 30890Sstevel@tonic-gate */ 30900Sstevel@tonic-gate hat_kpm_split_mseg_update(seg, segpp, seg_low, seg_mid, seg_high); 30910Sstevel@tonic-gate 30920Sstevel@tonic-gate /* 30930Sstevel@tonic-gate * At this point we have two equivalent memseg sub-chains, 30940Sstevel@tonic-gate * seg and seg_low/seg_mid/seg_high, which both chain on to 30950Sstevel@tonic-gate * the same place in the global chain. By re-writing the pointer 30960Sstevel@tonic-gate * in the previous element we switch atomically from using the old 30970Sstevel@tonic-gate * (seg) to the new. 30980Sstevel@tonic-gate */ 30990Sstevel@tonic-gate *segpp = (seg_low != NULL) ? seg_low : seg_mid; 31000Sstevel@tonic-gate 31010Sstevel@tonic-gate membar_enter(); 31020Sstevel@tonic-gate 31030Sstevel@tonic-gate build_pfn_hash(); 31040Sstevel@tonic-gate memsegs_unlock(1); 31050Sstevel@tonic-gate 31060Sstevel@tonic-gate /* 31070Sstevel@tonic-gate * We leave the old segment, 'seg', intact as there may be 31080Sstevel@tonic-gate * references to it. Also, as the value of total_pages has not 31090Sstevel@tonic-gate * changed and the memsegs list is effectively the same when 31100Sstevel@tonic-gate * accessed via the old or the new pointer, we do not have to 31110Sstevel@tonic-gate * cause pageout_scanner() to re-evaluate its hand pointers. 31120Sstevel@tonic-gate * 31130Sstevel@tonic-gate * We currently do not re-use or reclaim the page_t memory. 31140Sstevel@tonic-gate * If we do, then this may have to change. 31150Sstevel@tonic-gate */ 31160Sstevel@tonic-gate 31170Sstevel@tonic-gate mutex_enter(&memseg_lists_lock); 31180Sstevel@tonic-gate seg->lnext = memseg_edit_junk; 31190Sstevel@tonic-gate memseg_edit_junk = seg; 31200Sstevel@tonic-gate mutex_exit(&memseg_lists_lock); 31210Sstevel@tonic-gate 31220Sstevel@tonic-gate return (1); 31230Sstevel@tonic-gate } 31240Sstevel@tonic-gate 31250Sstevel@tonic-gate /* 31260Sstevel@tonic-gate * The sfmmu hat layer (e.g.) accesses some parts of the memseg 31270Sstevel@tonic-gate * structure using physical addresses. Therefore a kmem_cache is 31280Sstevel@tonic-gate * used with KMC_NOHASH to avoid page crossings within a memseg 31290Sstevel@tonic-gate * structure. KMC_NOHASH requires that no external (outside of 31300Sstevel@tonic-gate * slab) information is allowed. This, in turn, implies that the 31310Sstevel@tonic-gate * cache's slabsize must be exactly a single page, since per-slab 31320Sstevel@tonic-gate * information (e.g. the freelist for the slab) is kept at the 31330Sstevel@tonic-gate * end of the slab, where it is easy to locate. Should be changed 31340Sstevel@tonic-gate * when a more obvious kmem_cache interface/flag will become 31350Sstevel@tonic-gate * available. 31360Sstevel@tonic-gate */ 31370Sstevel@tonic-gate void 31380Sstevel@tonic-gate mem_config_init() 31390Sstevel@tonic-gate { 31400Sstevel@tonic-gate memseg_cache = kmem_cache_create("memseg_cache", sizeof (struct memseg), 31410Sstevel@tonic-gate 0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH); 31420Sstevel@tonic-gate } 3143