10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 50Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 60Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 70Sstevel@tonic-gate * with the License. 80Sstevel@tonic-gate * 90Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 100Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 110Sstevel@tonic-gate * See the License for the specific language governing permissions 120Sstevel@tonic-gate * and limitations under the License. 130Sstevel@tonic-gate * 140Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 150Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 160Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 170Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 180Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 190Sstevel@tonic-gate * 200Sstevel@tonic-gate * CDDL HEADER END 210Sstevel@tonic-gate */ 220Sstevel@tonic-gate /* 230Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 240Sstevel@tonic-gate * Use is subject to license terms. 250Sstevel@tonic-gate */ 260Sstevel@tonic-gate 270Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 280Sstevel@tonic-gate 290Sstevel@tonic-gate #include <sys/types.h> 300Sstevel@tonic-gate #include <sys/cmn_err.h> 310Sstevel@tonic-gate #include <sys/vmem.h> 320Sstevel@tonic-gate #include <sys/kmem.h> 330Sstevel@tonic-gate #include <sys/systm.h> 340Sstevel@tonic-gate #include <sys/machsystm.h> /* for page_freelist_coalesce() */ 350Sstevel@tonic-gate #include <sys/errno.h> 360Sstevel@tonic-gate #include <sys/memnode.h> 370Sstevel@tonic-gate #include <sys/memlist.h> 380Sstevel@tonic-gate #include <sys/memlist_impl.h> 390Sstevel@tonic-gate #include <sys/tuneable.h> 400Sstevel@tonic-gate #include <sys/proc.h> 410Sstevel@tonic-gate #include <sys/disp.h> 420Sstevel@tonic-gate #include <sys/debug.h> 430Sstevel@tonic-gate #include <sys/vm.h> 440Sstevel@tonic-gate #include <sys/callb.h> 450Sstevel@tonic-gate #include <sys/memlist_plat.h> /* for installed_top_size() */ 460Sstevel@tonic-gate #include <sys/condvar_impl.h> /* for CV_HAS_WAITERS() */ 470Sstevel@tonic-gate #include <sys/dumphdr.h> /* for dump_resize() */ 480Sstevel@tonic-gate #include <sys/atomic.h> /* for use in stats collection */ 490Sstevel@tonic-gate #include <sys/rwlock.h> 500Sstevel@tonic-gate #include <sys/cpuvar.h> 510Sstevel@tonic-gate #include <vm/seg_kmem.h> 520Sstevel@tonic-gate #include <vm/seg_kpm.h> 530Sstevel@tonic-gate #include <vm/page.h> 540Sstevel@tonic-gate #define SUNDDI_IMPL /* so sunddi.h will not redefine splx() et al */ 550Sstevel@tonic-gate #include <sys/sunddi.h> 560Sstevel@tonic-gate #include <sys/mem_config.h> 570Sstevel@tonic-gate #include <sys/mem_cage.h> 580Sstevel@tonic-gate #include <sys/lgrp.h> 590Sstevel@tonic-gate #include <sys/ddi.h> 600Sstevel@tonic-gate #include <sys/modctl.h> 610Sstevel@tonic-gate 620Sstevel@tonic-gate extern void memlist_read_lock(void); 630Sstevel@tonic-gate extern void memlist_read_unlock(void); 640Sstevel@tonic-gate extern void memlist_write_lock(void); 650Sstevel@tonic-gate extern void memlist_write_unlock(void); 660Sstevel@tonic-gate 670Sstevel@tonic-gate extern struct memlist *phys_avail; 680Sstevel@tonic-gate 690Sstevel@tonic-gate extern void mem_node_add(pfn_t, pfn_t); 700Sstevel@tonic-gate extern void mem_node_del(pfn_t, pfn_t); 710Sstevel@tonic-gate 720Sstevel@tonic-gate extern uint_t page_ctrs_adjust(int); 730Sstevel@tonic-gate static void kphysm_setup_post_add(pgcnt_t); 740Sstevel@tonic-gate static int kphysm_setup_pre_del(pgcnt_t); 750Sstevel@tonic-gate static void kphysm_setup_post_del(pgcnt_t, int); 760Sstevel@tonic-gate 770Sstevel@tonic-gate static int kphysm_split_memseg(pfn_t base, pgcnt_t npgs); 780Sstevel@tonic-gate 790Sstevel@tonic-gate static int delspan_reserve(pfn_t, pgcnt_t); 800Sstevel@tonic-gate static void delspan_unreserve(pfn_t, pgcnt_t); 810Sstevel@tonic-gate 820Sstevel@tonic-gate static kmutex_t memseg_lists_lock; 830Sstevel@tonic-gate static struct memseg *memseg_va_avail; 840Sstevel@tonic-gate static struct memseg *memseg_delete_junk; 850Sstevel@tonic-gate static struct memseg *memseg_edit_junk; 860Sstevel@tonic-gate void memseg_remap_init(void); 870Sstevel@tonic-gate static void memseg_remap_to_dummy(caddr_t, pgcnt_t); 880Sstevel@tonic-gate static void kphysm_addmem_error_undospan(pfn_t, pgcnt_t); 890Sstevel@tonic-gate static struct memseg *memseg_reuse(pgcnt_t); 900Sstevel@tonic-gate 910Sstevel@tonic-gate static struct kmem_cache *memseg_cache; 920Sstevel@tonic-gate 930Sstevel@tonic-gate /* 940Sstevel@tonic-gate * Add a chunk of memory to the system. page_t's for this memory 950Sstevel@tonic-gate * are allocated in the first few pages of the chunk. 960Sstevel@tonic-gate * base: starting PAGESIZE page of new memory. 970Sstevel@tonic-gate * npgs: length in PAGESIZE pages. 980Sstevel@tonic-gate * 990Sstevel@tonic-gate * Adding mem this way doesn't increase the size of the hash tables; 1000Sstevel@tonic-gate * growing them would be too hard. This should be OK, but adding memory 1010Sstevel@tonic-gate * dynamically most likely means more hash misses, since the tables will 1020Sstevel@tonic-gate * be smaller than they otherwise would be. 1030Sstevel@tonic-gate */ 1040Sstevel@tonic-gate int 1050Sstevel@tonic-gate kphysm_add_memory_dynamic(pfn_t base, pgcnt_t npgs) 1060Sstevel@tonic-gate { 1070Sstevel@tonic-gate page_t *pp; 1080Sstevel@tonic-gate page_t *opp, *oepp; 1090Sstevel@tonic-gate struct memseg *seg; 1100Sstevel@tonic-gate uint64_t avmem; 1110Sstevel@tonic-gate pfn_t pfn; 1120Sstevel@tonic-gate pfn_t pt_base = base; 1130Sstevel@tonic-gate pgcnt_t tpgs = npgs; 1140Sstevel@tonic-gate pgcnt_t metapgs; 1150Sstevel@tonic-gate int exhausted; 1160Sstevel@tonic-gate pfn_t pnum; 1170Sstevel@tonic-gate int mnode; 1180Sstevel@tonic-gate caddr_t vaddr; 1190Sstevel@tonic-gate int reuse; 1200Sstevel@tonic-gate int mlret; 1210Sstevel@tonic-gate void *mapva; 1220Sstevel@tonic-gate pgcnt_t nkpmpgs = 0; 1230Sstevel@tonic-gate offset_t kpm_pages_off; 1240Sstevel@tonic-gate 1250Sstevel@tonic-gate cmn_err(CE_CONT, 1260Sstevel@tonic-gate "?kphysm_add_memory_dynamic: adding %ldK at 0x%" PRIx64 "\n", 1270Sstevel@tonic-gate npgs << (PAGESHIFT - 10), (uint64_t)base << PAGESHIFT); 1280Sstevel@tonic-gate 1290Sstevel@tonic-gate /* 1300Sstevel@tonic-gate * Add this span in the delete list to prevent interactions. 1310Sstevel@tonic-gate */ 1320Sstevel@tonic-gate if (!delspan_reserve(base, npgs)) { 1330Sstevel@tonic-gate return (KPHYSM_ESPAN); 1340Sstevel@tonic-gate } 1350Sstevel@tonic-gate /* 1360Sstevel@tonic-gate * Check to see if any of the memory span has been added 1370Sstevel@tonic-gate * by trying an add to the installed memory list. This 1380Sstevel@tonic-gate * forms the interlocking process for add. 1390Sstevel@tonic-gate */ 1400Sstevel@tonic-gate 1410Sstevel@tonic-gate memlist_write_lock(); 1420Sstevel@tonic-gate 1430Sstevel@tonic-gate mlret = memlist_add_span((uint64_t)(pt_base) << PAGESHIFT, 1440Sstevel@tonic-gate (uint64_t)(tpgs) << PAGESHIFT, &phys_install); 1450Sstevel@tonic-gate 1460Sstevel@tonic-gate if (mlret == MEML_SPANOP_OK) 1470Sstevel@tonic-gate installed_top_size(phys_install, &physmax, &physinstalled); 1480Sstevel@tonic-gate 1490Sstevel@tonic-gate memlist_write_unlock(); 1500Sstevel@tonic-gate 1510Sstevel@tonic-gate if (mlret != MEML_SPANOP_OK) { 1520Sstevel@tonic-gate if (mlret == MEML_SPANOP_EALLOC) { 1530Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs); 1540Sstevel@tonic-gate return (KPHYSM_ERESOURCE); 1550Sstevel@tonic-gate } else 1560Sstevel@tonic-gate if (mlret == MEML_SPANOP_ESPAN) { 1570Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs); 1580Sstevel@tonic-gate return (KPHYSM_ESPAN); 1590Sstevel@tonic-gate } else { 1600Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs); 1610Sstevel@tonic-gate return (KPHYSM_ERESOURCE); 1620Sstevel@tonic-gate } 1630Sstevel@tonic-gate } 1640Sstevel@tonic-gate 1650Sstevel@tonic-gate /* 1660Sstevel@tonic-gate * We store the page_t's for this new memory in the first 1670Sstevel@tonic-gate * few pages of the chunk. Here, we go and get'em ... 1680Sstevel@tonic-gate */ 1690Sstevel@tonic-gate 1700Sstevel@tonic-gate /* 1710Sstevel@tonic-gate * The expression after the '-' gives the number of pages 1720Sstevel@tonic-gate * that will fit in the new memory based on a requirement 1730Sstevel@tonic-gate * of (PAGESIZE + sizeof (page_t)) bytes per page. 1740Sstevel@tonic-gate */ 1750Sstevel@tonic-gate metapgs = npgs - (((uint64_t)(npgs) << PAGESHIFT) / 1760Sstevel@tonic-gate (PAGESIZE + sizeof (page_t))); 1770Sstevel@tonic-gate 1780Sstevel@tonic-gate npgs -= metapgs; 1790Sstevel@tonic-gate base += metapgs; 1800Sstevel@tonic-gate 1810Sstevel@tonic-gate ASSERT(btopr(npgs * sizeof (page_t)) <= metapgs); 1820Sstevel@tonic-gate 1830Sstevel@tonic-gate exhausted = (metapgs == 0 || npgs == 0); 1840Sstevel@tonic-gate 1850Sstevel@tonic-gate if (kpm_enable && !exhausted) { 1860Sstevel@tonic-gate pgcnt_t start, end, nkpmpgs_prelim; 1870Sstevel@tonic-gate size_t ptsz; 1880Sstevel@tonic-gate 1890Sstevel@tonic-gate /* 1900Sstevel@tonic-gate * A viable kpm large page mapping must not overlap two 1910Sstevel@tonic-gate * dynamic memsegs. Therefore the total size is checked 1920Sstevel@tonic-gate * to be at least kpm_pgsz and also whether start and end 1930Sstevel@tonic-gate * points are at least kpm_pgsz aligned. 1940Sstevel@tonic-gate */ 1950Sstevel@tonic-gate if (ptokpmp(tpgs) < 1 || pmodkpmp(pt_base) || 1960Sstevel@tonic-gate pmodkpmp(base + npgs)) { 1970Sstevel@tonic-gate 1980Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs); 1990Sstevel@tonic-gate 2000Sstevel@tonic-gate /* 2010Sstevel@tonic-gate * There is no specific error code for violating 2020Sstevel@tonic-gate * kpm granularity constraints. 2030Sstevel@tonic-gate */ 2040Sstevel@tonic-gate return (KPHYSM_ENOTVIABLE); 2050Sstevel@tonic-gate } 2060Sstevel@tonic-gate 2070Sstevel@tonic-gate start = kpmptop(ptokpmp(base)); 2080Sstevel@tonic-gate end = kpmptop(ptokpmp(base + npgs)); 2090Sstevel@tonic-gate nkpmpgs_prelim = ptokpmp(end - start); 2100Sstevel@tonic-gate ptsz = npgs * sizeof (page_t); 2110Sstevel@tonic-gate metapgs = btopr(ptsz + nkpmpgs_prelim * KPMPAGE_T_SZ); 2120Sstevel@tonic-gate exhausted = (tpgs <= metapgs); 2130Sstevel@tonic-gate if (!exhausted) { 2140Sstevel@tonic-gate npgs = tpgs - metapgs; 2150Sstevel@tonic-gate base = pt_base + metapgs; 2160Sstevel@tonic-gate 2170Sstevel@tonic-gate /* final nkpmpgs */ 2180Sstevel@tonic-gate start = kpmptop(ptokpmp(base)); 2190Sstevel@tonic-gate nkpmpgs = ptokpmp(end - start); 2200Sstevel@tonic-gate kpm_pages_off = ptsz + 2210Sstevel@tonic-gate (nkpmpgs_prelim - nkpmpgs) * KPMPAGE_T_SZ; 2220Sstevel@tonic-gate } 2230Sstevel@tonic-gate } 2240Sstevel@tonic-gate 2250Sstevel@tonic-gate /* 2260Sstevel@tonic-gate * Is memory area supplied too small? 2270Sstevel@tonic-gate */ 2280Sstevel@tonic-gate if (exhausted) { 2290Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs); 2300Sstevel@tonic-gate 2310Sstevel@tonic-gate /* 2320Sstevel@tonic-gate * There is no specific error code for 'too small'. 2330Sstevel@tonic-gate */ 2340Sstevel@tonic-gate return (KPHYSM_ERESOURCE); 2350Sstevel@tonic-gate } 2360Sstevel@tonic-gate 2370Sstevel@tonic-gate /* 2380Sstevel@tonic-gate * We may re-use a previously allocated VA space for the page_ts 2390Sstevel@tonic-gate * eventually, but we need to initialize and lock the pages first. 2400Sstevel@tonic-gate */ 2410Sstevel@tonic-gate 2420Sstevel@tonic-gate /* 2430Sstevel@tonic-gate * Get an address in the kernel address map, map 2440Sstevel@tonic-gate * the page_t pages and see if we can touch them. 2450Sstevel@tonic-gate */ 2460Sstevel@tonic-gate 2470Sstevel@tonic-gate mapva = vmem_alloc(heap_arena, ptob(metapgs), VM_NOSLEEP); 2480Sstevel@tonic-gate if (mapva == NULL) { 2490Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_add_memory_dynamic:" 2500Sstevel@tonic-gate " Can't allocate VA for page_ts"); 2510Sstevel@tonic-gate 2520Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs); 2530Sstevel@tonic-gate 2540Sstevel@tonic-gate return (KPHYSM_ERESOURCE); 2550Sstevel@tonic-gate } 2560Sstevel@tonic-gate pp = mapva; 2570Sstevel@tonic-gate 2580Sstevel@tonic-gate if (physmax < (pt_base + tpgs)) 2590Sstevel@tonic-gate physmax = (pt_base + tpgs); 2600Sstevel@tonic-gate 2610Sstevel@tonic-gate /* 2620Sstevel@tonic-gate * In the remapping code we map one page at a time so we must do 2630Sstevel@tonic-gate * the same here to match mapping sizes. 2640Sstevel@tonic-gate */ 2650Sstevel@tonic-gate pfn = pt_base; 2660Sstevel@tonic-gate vaddr = (caddr_t)pp; 2670Sstevel@tonic-gate for (pnum = 0; pnum < metapgs; pnum++) { 2680Sstevel@tonic-gate hat_devload(kas.a_hat, vaddr, ptob(1), pfn, 2690Sstevel@tonic-gate PROT_READ | PROT_WRITE, 2700Sstevel@tonic-gate HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST); 2710Sstevel@tonic-gate pfn++; 2720Sstevel@tonic-gate vaddr += ptob(1); 2730Sstevel@tonic-gate } 2740Sstevel@tonic-gate 2750Sstevel@tonic-gate if (ddi_peek32((dev_info_t *)NULL, 2760Sstevel@tonic-gate (int32_t *)pp, (int32_t *)0) == DDI_FAILURE) { 2770Sstevel@tonic-gate 2780Sstevel@tonic-gate cmn_err(CE_PANIC, "kphysm_add_memory_dynamic:" 2790Sstevel@tonic-gate " Can't access pp array at 0x%p [phys 0x%lx]", 2800Sstevel@tonic-gate (void *)pp, pt_base); 2810Sstevel@tonic-gate 2820Sstevel@tonic-gate hat_unload(kas.a_hat, (caddr_t)pp, ptob(metapgs), 2830Sstevel@tonic-gate HAT_UNLOAD_UNMAP|HAT_UNLOAD_UNLOCK); 2840Sstevel@tonic-gate 2850Sstevel@tonic-gate vmem_free(heap_arena, mapva, ptob(metapgs)); 2860Sstevel@tonic-gate 2870Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs); 2880Sstevel@tonic-gate 2890Sstevel@tonic-gate return (KPHYSM_EFAULT); 2900Sstevel@tonic-gate } 2910Sstevel@tonic-gate 2920Sstevel@tonic-gate /* 2930Sstevel@tonic-gate * Add this memory slice to its memory node translation. 2940Sstevel@tonic-gate * 2950Sstevel@tonic-gate * Note that right now, each node may have only one slice; 2960Sstevel@tonic-gate * this may change with COD or in larger SSM systems with 2970Sstevel@tonic-gate * nested latency groups, so we must not assume that the 2980Sstevel@tonic-gate * node does not yet exist. 2990Sstevel@tonic-gate */ 3000Sstevel@tonic-gate pnum = base + npgs - 1; 3010Sstevel@tonic-gate mem_node_add_slice(base, pnum); 3020Sstevel@tonic-gate 3030Sstevel@tonic-gate /* 3040Sstevel@tonic-gate * Allocate or resize page counters as necessary to accomodate 3050Sstevel@tonic-gate * the increase in memory pages. 3060Sstevel@tonic-gate */ 3070Sstevel@tonic-gate mnode = PFN_2_MEM_NODE(pnum); 3080Sstevel@tonic-gate if (page_ctrs_adjust(mnode) != 0) { 3090Sstevel@tonic-gate 3100Sstevel@tonic-gate mem_node_pre_del_slice(base, pnum); 3110Sstevel@tonic-gate mem_node_post_del_slice(base, pnum, 0); 3120Sstevel@tonic-gate 3130Sstevel@tonic-gate hat_unload(kas.a_hat, (caddr_t)pp, ptob(metapgs), 3140Sstevel@tonic-gate HAT_UNLOAD_UNMAP|HAT_UNLOAD_UNLOCK); 3150Sstevel@tonic-gate 3160Sstevel@tonic-gate vmem_free(heap_arena, mapva, ptob(metapgs)); 3170Sstevel@tonic-gate 3180Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs); 3190Sstevel@tonic-gate 3200Sstevel@tonic-gate return (KPHYSM_ERESOURCE); 3210Sstevel@tonic-gate } 3220Sstevel@tonic-gate 3230Sstevel@tonic-gate /* 3240Sstevel@tonic-gate * Update the phys_avail memory list. 3250Sstevel@tonic-gate * The phys_install list was done at the start. 3260Sstevel@tonic-gate */ 3270Sstevel@tonic-gate 3280Sstevel@tonic-gate memlist_write_lock(); 3290Sstevel@tonic-gate 3300Sstevel@tonic-gate mlret = memlist_add_span((uint64_t)(base) << PAGESHIFT, 3310Sstevel@tonic-gate (uint64_t)(npgs) << PAGESHIFT, &phys_avail); 3320Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK); 3330Sstevel@tonic-gate 3340Sstevel@tonic-gate memlist_write_unlock(); 3350Sstevel@tonic-gate 3360Sstevel@tonic-gate /* See if we can find a memseg to re-use. */ 3370Sstevel@tonic-gate seg = memseg_reuse(metapgs); 3380Sstevel@tonic-gate 3390Sstevel@tonic-gate reuse = (seg != NULL); 3400Sstevel@tonic-gate 3410Sstevel@tonic-gate /* 3420Sstevel@tonic-gate * Initialize the memseg structure representing this memory 3430Sstevel@tonic-gate * and add it to the existing list of memsegs. Do some basic 3440Sstevel@tonic-gate * initialization and add the memory to the system. 3450Sstevel@tonic-gate * In order to prevent lock deadlocks, the add_physmem() 3460Sstevel@tonic-gate * code is repeated here, but split into several stages. 3470Sstevel@tonic-gate */ 3480Sstevel@tonic-gate if (seg == NULL) { 3490Sstevel@tonic-gate seg = kmem_cache_alloc(memseg_cache, KM_SLEEP); 3500Sstevel@tonic-gate bzero(seg, sizeof (struct memseg)); 3510Sstevel@tonic-gate seg->msegflags = MEMSEG_DYNAMIC; 3520Sstevel@tonic-gate seg->pages = pp; 3530Sstevel@tonic-gate } else { 3540Sstevel@tonic-gate /*EMPTY*/ 3550Sstevel@tonic-gate ASSERT(seg->msegflags & MEMSEG_DYNAMIC); 3560Sstevel@tonic-gate } 3570Sstevel@tonic-gate 3580Sstevel@tonic-gate seg->epages = seg->pages + npgs; 3590Sstevel@tonic-gate seg->pages_base = base; 3600Sstevel@tonic-gate seg->pages_end = base + npgs; 3610Sstevel@tonic-gate 3620Sstevel@tonic-gate /* 3630Sstevel@tonic-gate * Initialize metadata. The page_ts are set to locked state 3640Sstevel@tonic-gate * ready to be freed. 3650Sstevel@tonic-gate */ 3660Sstevel@tonic-gate bzero((caddr_t)pp, ptob(metapgs)); 3670Sstevel@tonic-gate 3680Sstevel@tonic-gate pfn = seg->pages_base; 3690Sstevel@tonic-gate /* Save the original pp base in case we reuse a memseg. */ 3700Sstevel@tonic-gate opp = pp; 3710Sstevel@tonic-gate oepp = opp + npgs; 3720Sstevel@tonic-gate for (pp = opp; pp < oepp; pp++) { 3730Sstevel@tonic-gate pp->p_pagenum = pfn; 3740Sstevel@tonic-gate pfn++; 3750Sstevel@tonic-gate page_iolock_init(pp); 3760Sstevel@tonic-gate while (!page_lock(pp, SE_EXCL, (kmutex_t *)NULL, P_RECLAIM)) 3770Sstevel@tonic-gate continue; 3780Sstevel@tonic-gate pp->p_offset = (u_offset_t)-1; 3790Sstevel@tonic-gate } 3800Sstevel@tonic-gate 3810Sstevel@tonic-gate if (reuse) { 3820Sstevel@tonic-gate /* Remap our page_ts to the re-used memseg VA space. */ 3830Sstevel@tonic-gate pfn = pt_base; 3840Sstevel@tonic-gate vaddr = (caddr_t)seg->pages; 3850Sstevel@tonic-gate for (pnum = 0; pnum < metapgs; pnum++) { 3860Sstevel@tonic-gate hat_devload(kas.a_hat, vaddr, ptob(1), pfn, 3870Sstevel@tonic-gate PROT_READ | PROT_WRITE, 3880Sstevel@tonic-gate HAT_LOAD_REMAP | HAT_LOAD | HAT_LOAD_NOCONSIST); 3890Sstevel@tonic-gate pfn++; 3900Sstevel@tonic-gate vaddr += ptob(1); 3910Sstevel@tonic-gate } 3920Sstevel@tonic-gate 3930Sstevel@tonic-gate hat_unload(kas.a_hat, (caddr_t)opp, ptob(metapgs), 3940Sstevel@tonic-gate HAT_UNLOAD_UNMAP|HAT_UNLOAD_UNLOCK); 3950Sstevel@tonic-gate 3960Sstevel@tonic-gate vmem_free(heap_arena, mapva, ptob(metapgs)); 3970Sstevel@tonic-gate } 3980Sstevel@tonic-gate 3990Sstevel@tonic-gate hat_kpm_addmem_mseg_update(seg, nkpmpgs, kpm_pages_off); 4000Sstevel@tonic-gate 4010Sstevel@tonic-gate memsegs_lock(1); 4020Sstevel@tonic-gate 4030Sstevel@tonic-gate /* 4040Sstevel@tonic-gate * The new memseg is inserted at the beginning of the list. 4050Sstevel@tonic-gate * Not only does this save searching for the tail, but in the 4060Sstevel@tonic-gate * case of a re-used memseg, it solves the problem of what 4070Sstevel@tonic-gate * happens of some process has still got a pointer to the 4080Sstevel@tonic-gate * memseg and follows the next pointer to continue traversing 4090Sstevel@tonic-gate * the memsegs list. 4100Sstevel@tonic-gate */ 4110Sstevel@tonic-gate 4120Sstevel@tonic-gate hat_kpm_addmem_mseg_insert(seg); 4130Sstevel@tonic-gate 4140Sstevel@tonic-gate seg->next = memsegs; 4150Sstevel@tonic-gate membar_producer(); 4160Sstevel@tonic-gate 4170Sstevel@tonic-gate hat_kpm_addmem_memsegs_update(seg); 4180Sstevel@tonic-gate 4190Sstevel@tonic-gate memsegs = seg; 4200Sstevel@tonic-gate 4210Sstevel@tonic-gate build_pfn_hash(); 4220Sstevel@tonic-gate 4230Sstevel@tonic-gate total_pages += npgs; 4240Sstevel@tonic-gate 4250Sstevel@tonic-gate /* 4260Sstevel@tonic-gate * Recalculate the paging parameters now total_pages has changed. 4270Sstevel@tonic-gate * This will also cause the clock hands to be reset before next use. 4280Sstevel@tonic-gate */ 4290Sstevel@tonic-gate setupclock(1); 4300Sstevel@tonic-gate 4310Sstevel@tonic-gate memsegs_unlock(1); 4320Sstevel@tonic-gate 4330Sstevel@tonic-gate /* 4340Sstevel@tonic-gate * Free the pages outside the lock to avoid locking loops. 4350Sstevel@tonic-gate */ 4360Sstevel@tonic-gate for (pp = seg->pages; pp < seg->epages; pp++) { 4370Sstevel@tonic-gate page_free(pp, 1); 4380Sstevel@tonic-gate } 4390Sstevel@tonic-gate 4400Sstevel@tonic-gate /* 4410Sstevel@tonic-gate * Now that we've updated the appropriate memory lists we 4420Sstevel@tonic-gate * need to reset a number of globals, since we've increased memory. 4430Sstevel@tonic-gate * Several have already been updated for us as noted above. The 4440Sstevel@tonic-gate * globals we're interested in at this point are: 4450Sstevel@tonic-gate * physmax - highest page frame number. 4460Sstevel@tonic-gate * physinstalled - number of pages currently installed (done earlier) 4470Sstevel@tonic-gate * maxmem - max free pages in the system 4480Sstevel@tonic-gate * physmem - physical memory pages available 4490Sstevel@tonic-gate * availrmem - real memory available 4500Sstevel@tonic-gate */ 4510Sstevel@tonic-gate 4520Sstevel@tonic-gate mutex_enter(&freemem_lock); 4530Sstevel@tonic-gate maxmem += npgs; 4540Sstevel@tonic-gate physmem += npgs; 4550Sstevel@tonic-gate availrmem += npgs; 4560Sstevel@tonic-gate availrmem_initial += npgs; 4570Sstevel@tonic-gate 4580Sstevel@tonic-gate mutex_exit(&freemem_lock); 4590Sstevel@tonic-gate 4600Sstevel@tonic-gate dump_resize(); 4610Sstevel@tonic-gate 4620Sstevel@tonic-gate page_freelist_coalesce_all(mnode); 4630Sstevel@tonic-gate 4640Sstevel@tonic-gate kphysm_setup_post_add(npgs); 4650Sstevel@tonic-gate 4660Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_add_memory_dynamic: mem = %ldK " 4670Sstevel@tonic-gate "(0x%" PRIx64 ")\n", 4680Sstevel@tonic-gate physinstalled << (PAGESHIFT - 10), 4690Sstevel@tonic-gate (uint64_t)physinstalled << PAGESHIFT); 4700Sstevel@tonic-gate 4710Sstevel@tonic-gate avmem = (uint64_t)freemem << PAGESHIFT; 4720Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_add_memory_dynamic: " 4730Sstevel@tonic-gate "avail mem = %" PRId64 "\n", avmem); 4740Sstevel@tonic-gate 4750Sstevel@tonic-gate /* 4760Sstevel@tonic-gate * Update lgroup generation number on single lgroup systems 4770Sstevel@tonic-gate */ 4780Sstevel@tonic-gate if (nlgrps == 1) 4790Sstevel@tonic-gate lgrp_config(LGRP_CONFIG_GEN_UPDATE, 0, 0); 4800Sstevel@tonic-gate 4810Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs); 4820Sstevel@tonic-gate return (KPHYSM_OK); /* Successfully added system memory */ 4830Sstevel@tonic-gate 4840Sstevel@tonic-gate } 4850Sstevel@tonic-gate 4860Sstevel@tonic-gate /* 4870Sstevel@tonic-gate * There are various error conditions in kphysm_add_memory_dynamic() 4880Sstevel@tonic-gate * which require a rollback of already changed global state. 4890Sstevel@tonic-gate */ 4900Sstevel@tonic-gate static void 4910Sstevel@tonic-gate kphysm_addmem_error_undospan(pfn_t pt_base, pgcnt_t tpgs) 4920Sstevel@tonic-gate { 4930Sstevel@tonic-gate int mlret; 4940Sstevel@tonic-gate 4950Sstevel@tonic-gate /* Unreserve memory span. */ 4960Sstevel@tonic-gate memlist_write_lock(); 4970Sstevel@tonic-gate 4980Sstevel@tonic-gate mlret = memlist_delete_span( 4990Sstevel@tonic-gate (uint64_t)(pt_base) << PAGESHIFT, 5000Sstevel@tonic-gate (uint64_t)(tpgs) << PAGESHIFT, &phys_install); 5010Sstevel@tonic-gate 5020Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK); 5030Sstevel@tonic-gate phys_install_has_changed(); 5040Sstevel@tonic-gate installed_top_size(phys_install, &physmax, &physinstalled); 5050Sstevel@tonic-gate 5060Sstevel@tonic-gate memlist_write_unlock(); 5070Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs); 5080Sstevel@tonic-gate } 5090Sstevel@tonic-gate 5100Sstevel@tonic-gate /* 5110Sstevel@tonic-gate * Only return an available memseg of exactly the right size. 5120Sstevel@tonic-gate * When the meta data area has it's own virtual address space 5130Sstevel@tonic-gate * we will need to manage this more carefully and do best fit 5140Sstevel@tonic-gate * allocations, possibly splitting an availble area. 5150Sstevel@tonic-gate */ 5160Sstevel@tonic-gate static struct memseg * 5170Sstevel@tonic-gate memseg_reuse(pgcnt_t metapgs) 5180Sstevel@tonic-gate { 5190Sstevel@tonic-gate struct memseg **segpp, *seg; 5200Sstevel@tonic-gate 5210Sstevel@tonic-gate mutex_enter(&memseg_lists_lock); 5220Sstevel@tonic-gate 5230Sstevel@tonic-gate segpp = &memseg_va_avail; 5240Sstevel@tonic-gate for (; (seg = *segpp) != NULL; segpp = &seg->lnext) { 5250Sstevel@tonic-gate caddr_t end; 5260Sstevel@tonic-gate 5270Sstevel@tonic-gate if (kpm_enable) 5280Sstevel@tonic-gate end = hat_kpm_mseg_reuse(seg); 5290Sstevel@tonic-gate else 5300Sstevel@tonic-gate end = (caddr_t)seg->epages; 5310Sstevel@tonic-gate 5320Sstevel@tonic-gate if (btopr(end - (caddr_t)seg->pages) == metapgs) { 5330Sstevel@tonic-gate *segpp = seg->lnext; 5340Sstevel@tonic-gate seg->lnext = NULL; 5350Sstevel@tonic-gate break; 5360Sstevel@tonic-gate } 5370Sstevel@tonic-gate } 5380Sstevel@tonic-gate mutex_exit(&memseg_lists_lock); 5390Sstevel@tonic-gate 5400Sstevel@tonic-gate return (seg); 5410Sstevel@tonic-gate } 5420Sstevel@tonic-gate 5430Sstevel@tonic-gate static uint_t handle_gen; 5440Sstevel@tonic-gate 5450Sstevel@tonic-gate struct memdelspan { 5460Sstevel@tonic-gate struct memdelspan *mds_next; 5470Sstevel@tonic-gate pfn_t mds_base; 5480Sstevel@tonic-gate pgcnt_t mds_npgs; 5490Sstevel@tonic-gate uint_t *mds_bitmap; 5500Sstevel@tonic-gate uint_t *mds_bitmap_retired; 5510Sstevel@tonic-gate }; 5520Sstevel@tonic-gate 5530Sstevel@tonic-gate #define NBPBMW (sizeof (uint_t) * NBBY) 5540Sstevel@tonic-gate #define MDS_BITMAPBYTES(MDSP) \ 5550Sstevel@tonic-gate ((((MDSP)->mds_npgs + NBPBMW - 1) / NBPBMW) * sizeof (uint_t)) 5560Sstevel@tonic-gate 5570Sstevel@tonic-gate struct transit_list { 5580Sstevel@tonic-gate struct transit_list *trl_next; 5590Sstevel@tonic-gate struct memdelspan *trl_spans; 5600Sstevel@tonic-gate int trl_collect; 5610Sstevel@tonic-gate }; 5620Sstevel@tonic-gate 5630Sstevel@tonic-gate struct transit_list_head { 5640Sstevel@tonic-gate kmutex_t trh_lock; 5650Sstevel@tonic-gate struct transit_list *trh_head; 5660Sstevel@tonic-gate }; 5670Sstevel@tonic-gate 5680Sstevel@tonic-gate static struct transit_list_head transit_list_head; 5690Sstevel@tonic-gate 5700Sstevel@tonic-gate struct mem_handle; 5710Sstevel@tonic-gate static void transit_list_collect(struct mem_handle *, int); 5720Sstevel@tonic-gate static void transit_list_insert(struct transit_list *); 5730Sstevel@tonic-gate static void transit_list_remove(struct transit_list *); 5740Sstevel@tonic-gate 5750Sstevel@tonic-gate #ifdef DEBUG 5760Sstevel@tonic-gate #define MEM_DEL_STATS 5770Sstevel@tonic-gate #endif /* DEBUG */ 5780Sstevel@tonic-gate 5790Sstevel@tonic-gate #ifdef MEM_DEL_STATS 5800Sstevel@tonic-gate static int mem_del_stat_print = 0; 5810Sstevel@tonic-gate struct mem_del_stat { 5820Sstevel@tonic-gate uint_t nloop; 5830Sstevel@tonic-gate uint_t need_free; 5840Sstevel@tonic-gate uint_t free_loop; 5850Sstevel@tonic-gate uint_t free_low; 5860Sstevel@tonic-gate uint_t free_failed; 5870Sstevel@tonic-gate uint_t ncheck; 5880Sstevel@tonic-gate uint_t nopaget; 5890Sstevel@tonic-gate uint_t lockfail; 5900Sstevel@tonic-gate uint_t nfree; 5910Sstevel@tonic-gate uint_t nreloc; 5920Sstevel@tonic-gate uint_t nrelocfail; 5930Sstevel@tonic-gate uint_t already_done; 5940Sstevel@tonic-gate uint_t first_notfree; 5950Sstevel@tonic-gate uint_t npplocked; 5960Sstevel@tonic-gate uint_t nlockreloc; 5970Sstevel@tonic-gate uint_t nnorepl; 5980Sstevel@tonic-gate uint_t nmodreloc; 5990Sstevel@tonic-gate uint_t ndestroy; 6000Sstevel@tonic-gate uint_t nputpage; 6010Sstevel@tonic-gate uint_t nnoreclaim; 6020Sstevel@tonic-gate uint_t ndelay; 6030Sstevel@tonic-gate uint_t demotefail; 6040Sstevel@tonic-gate uint64_t nticks_total; 6050Sstevel@tonic-gate uint64_t nticks_pgrp; 6060Sstevel@tonic-gate uint_t retired; 6070Sstevel@tonic-gate uint_t toxic; 6080Sstevel@tonic-gate uint_t failing; 6090Sstevel@tonic-gate uint_t modtoxic; 6100Sstevel@tonic-gate uint_t npplkdtoxic; 6110Sstevel@tonic-gate uint_t gptlmodfail; 6120Sstevel@tonic-gate uint_t gptllckfail; 6130Sstevel@tonic-gate }; 6140Sstevel@tonic-gate /* 6150Sstevel@tonic-gate * The stat values are only incremented in the delete thread 6160Sstevel@tonic-gate * so no locking or atomic required. 6170Sstevel@tonic-gate */ 6180Sstevel@tonic-gate #define MDSTAT_INCR(MHP, FLD) (MHP)->mh_delstat.FLD++ 6190Sstevel@tonic-gate #define MDSTAT_TOTAL(MHP, ntck) ((MHP)->mh_delstat.nticks_total += (ntck)) 6200Sstevel@tonic-gate #define MDSTAT_PGRP(MHP, ntck) ((MHP)->mh_delstat.nticks_pgrp += (ntck)) 6210Sstevel@tonic-gate static void mem_del_stat_print_func(struct mem_handle *); 6220Sstevel@tonic-gate #define MDSTAT_PRINT(MHP) mem_del_stat_print_func((MHP)) 6230Sstevel@tonic-gate #else /* MEM_DEL_STATS */ 6240Sstevel@tonic-gate #define MDSTAT_INCR(MHP, FLD) 6250Sstevel@tonic-gate #define MDSTAT_TOTAL(MHP, ntck) 6260Sstevel@tonic-gate #define MDSTAT_PGRP(MHP, ntck) 6270Sstevel@tonic-gate #define MDSTAT_PRINT(MHP) 6280Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 6290Sstevel@tonic-gate 6300Sstevel@tonic-gate typedef enum mhnd_state {MHND_FREE = 0, MHND_INIT, MHND_STARTING, 6310Sstevel@tonic-gate MHND_RUNNING, MHND_DONE, MHND_RELEASE} mhnd_state_t; 6320Sstevel@tonic-gate 6330Sstevel@tonic-gate /* 6340Sstevel@tonic-gate * mh_mutex must be taken to examine or change mh_exthandle and mh_state. 6350Sstevel@tonic-gate * The mutex may not be required for other fields, dependent on mh_state. 6360Sstevel@tonic-gate */ 6370Sstevel@tonic-gate struct mem_handle { 6380Sstevel@tonic-gate kmutex_t mh_mutex; 6390Sstevel@tonic-gate struct mem_handle *mh_next; 6400Sstevel@tonic-gate memhandle_t mh_exthandle; 6410Sstevel@tonic-gate mhnd_state_t mh_state; 6420Sstevel@tonic-gate struct transit_list mh_transit; 6430Sstevel@tonic-gate pgcnt_t mh_phys_pages; 6440Sstevel@tonic-gate pgcnt_t mh_vm_pages; 6450Sstevel@tonic-gate pgcnt_t mh_hold_todo; 6460Sstevel@tonic-gate void (*mh_delete_complete)(void *, int error); 6470Sstevel@tonic-gate void *mh_delete_complete_arg; 6480Sstevel@tonic-gate volatile uint_t mh_cancel; 6490Sstevel@tonic-gate volatile uint_t mh_dr_aio_cleanup_cancel; 6500Sstevel@tonic-gate volatile uint_t mh_aio_cleanup_done; 6510Sstevel@tonic-gate kcondvar_t mh_cv; 6520Sstevel@tonic-gate kthread_id_t mh_thread_id; 6530Sstevel@tonic-gate page_t *mh_deleted; /* link through p_next */ 6540Sstevel@tonic-gate #ifdef MEM_DEL_STATS 6550Sstevel@tonic-gate struct mem_del_stat mh_delstat; 6560Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 6570Sstevel@tonic-gate }; 6580Sstevel@tonic-gate 6590Sstevel@tonic-gate static struct mem_handle *mem_handle_head; 6600Sstevel@tonic-gate static kmutex_t mem_handle_list_mutex; 6610Sstevel@tonic-gate 6620Sstevel@tonic-gate static struct mem_handle * 6630Sstevel@tonic-gate kphysm_allocate_mem_handle() 6640Sstevel@tonic-gate { 6650Sstevel@tonic-gate struct mem_handle *mhp; 6660Sstevel@tonic-gate 6670Sstevel@tonic-gate mhp = kmem_zalloc(sizeof (struct mem_handle), KM_SLEEP); 6680Sstevel@tonic-gate mutex_init(&mhp->mh_mutex, NULL, MUTEX_DEFAULT, NULL); 6690Sstevel@tonic-gate mutex_enter(&mem_handle_list_mutex); 6700Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 6710Sstevel@tonic-gate /* handle_gen is protected by list mutex. */ 672567Sdmick mhp->mh_exthandle = (memhandle_t)(uintptr_t)(++handle_gen); 6730Sstevel@tonic-gate mhp->mh_next = mem_handle_head; 6740Sstevel@tonic-gate mem_handle_head = mhp; 6750Sstevel@tonic-gate mutex_exit(&mem_handle_list_mutex); 6760Sstevel@tonic-gate 6770Sstevel@tonic-gate return (mhp); 6780Sstevel@tonic-gate } 6790Sstevel@tonic-gate 6800Sstevel@tonic-gate static void 6810Sstevel@tonic-gate kphysm_free_mem_handle(struct mem_handle *mhp) 6820Sstevel@tonic-gate { 6830Sstevel@tonic-gate struct mem_handle **mhpp; 6840Sstevel@tonic-gate 6850Sstevel@tonic-gate ASSERT(mutex_owned(&mhp->mh_mutex)); 6860Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_FREE); 6870Sstevel@tonic-gate /* 6880Sstevel@tonic-gate * Exit the mutex to preserve locking order. This is OK 6890Sstevel@tonic-gate * here as once in the FREE state, the handle cannot 6900Sstevel@tonic-gate * be found by a lookup. 6910Sstevel@tonic-gate */ 6920Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 6930Sstevel@tonic-gate 6940Sstevel@tonic-gate mutex_enter(&mem_handle_list_mutex); 6950Sstevel@tonic-gate mhpp = &mem_handle_head; 6960Sstevel@tonic-gate while (*mhpp != NULL && *mhpp != mhp) 6970Sstevel@tonic-gate mhpp = &(*mhpp)->mh_next; 6980Sstevel@tonic-gate ASSERT(*mhpp == mhp); 6990Sstevel@tonic-gate /* 7000Sstevel@tonic-gate * No need to lock the handle (mh_mutex) as only 7010Sstevel@tonic-gate * mh_next changing and this is the only thread that 7020Sstevel@tonic-gate * can be referncing mhp. 7030Sstevel@tonic-gate */ 7040Sstevel@tonic-gate *mhpp = mhp->mh_next; 7050Sstevel@tonic-gate mutex_exit(&mem_handle_list_mutex); 7060Sstevel@tonic-gate 7070Sstevel@tonic-gate mutex_destroy(&mhp->mh_mutex); 7080Sstevel@tonic-gate kmem_free(mhp, sizeof (struct mem_handle)); 7090Sstevel@tonic-gate } 7100Sstevel@tonic-gate 7110Sstevel@tonic-gate /* 7120Sstevel@tonic-gate * This function finds the internal mem_handle corresponding to an 7130Sstevel@tonic-gate * external handle and returns it with the mh_mutex held. 7140Sstevel@tonic-gate */ 7150Sstevel@tonic-gate static struct mem_handle * 7160Sstevel@tonic-gate kphysm_lookup_mem_handle(memhandle_t handle) 7170Sstevel@tonic-gate { 7180Sstevel@tonic-gate struct mem_handle *mhp; 7190Sstevel@tonic-gate 7200Sstevel@tonic-gate mutex_enter(&mem_handle_list_mutex); 7210Sstevel@tonic-gate for (mhp = mem_handle_head; mhp != NULL; mhp = mhp->mh_next) { 7220Sstevel@tonic-gate if (mhp->mh_exthandle == handle) { 7230Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 7240Sstevel@tonic-gate /* 7250Sstevel@tonic-gate * The state of the handle could have been changed 7260Sstevel@tonic-gate * by kphysm_del_release() while waiting for mh_mutex. 7270Sstevel@tonic-gate */ 7280Sstevel@tonic-gate if (mhp->mh_state == MHND_FREE) { 7290Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 7300Sstevel@tonic-gate continue; 7310Sstevel@tonic-gate } 7320Sstevel@tonic-gate break; 7330Sstevel@tonic-gate } 7340Sstevel@tonic-gate } 7350Sstevel@tonic-gate mutex_exit(&mem_handle_list_mutex); 7360Sstevel@tonic-gate return (mhp); 7370Sstevel@tonic-gate } 7380Sstevel@tonic-gate 7390Sstevel@tonic-gate int 7400Sstevel@tonic-gate kphysm_del_gethandle(memhandle_t *xmhp) 7410Sstevel@tonic-gate { 7420Sstevel@tonic-gate struct mem_handle *mhp; 7430Sstevel@tonic-gate 7440Sstevel@tonic-gate mhp = kphysm_allocate_mem_handle(); 7450Sstevel@tonic-gate /* 7460Sstevel@tonic-gate * The handle is allocated using KM_SLEEP, so cannot fail. 7470Sstevel@tonic-gate * If the implementation is changed, the correct error to return 7480Sstevel@tonic-gate * here would be KPHYSM_ENOHANDLES. 7490Sstevel@tonic-gate */ 7500Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_FREE); 7510Sstevel@tonic-gate mhp->mh_state = MHND_INIT; 7520Sstevel@tonic-gate *xmhp = mhp->mh_exthandle; 7530Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 7540Sstevel@tonic-gate return (KPHYSM_OK); 7550Sstevel@tonic-gate } 7560Sstevel@tonic-gate 7570Sstevel@tonic-gate static int 7580Sstevel@tonic-gate overlapping(pfn_t b1, pgcnt_t l1, pfn_t b2, pgcnt_t l2) 7590Sstevel@tonic-gate { 7600Sstevel@tonic-gate pfn_t e1, e2; 7610Sstevel@tonic-gate 7620Sstevel@tonic-gate e1 = b1 + l1; 7630Sstevel@tonic-gate e2 = b2 + l2; 7640Sstevel@tonic-gate 7650Sstevel@tonic-gate return (!(b2 >= e1 || b1 >= e2)); 7660Sstevel@tonic-gate } 7670Sstevel@tonic-gate 7680Sstevel@tonic-gate static int can_remove_pgs(pgcnt_t); 7690Sstevel@tonic-gate 7700Sstevel@tonic-gate static struct memdelspan * 7710Sstevel@tonic-gate span_to_install(pfn_t base, pgcnt_t npgs) 7720Sstevel@tonic-gate { 7730Sstevel@tonic-gate struct memdelspan *mdsp; 7740Sstevel@tonic-gate struct memdelspan *mdsp_new; 7750Sstevel@tonic-gate uint64_t address, size, thislen; 7760Sstevel@tonic-gate struct memlist *mlp; 7770Sstevel@tonic-gate 7780Sstevel@tonic-gate mdsp_new = NULL; 7790Sstevel@tonic-gate 7800Sstevel@tonic-gate address = (uint64_t)base << PAGESHIFT; 7810Sstevel@tonic-gate size = (uint64_t)npgs << PAGESHIFT; 7820Sstevel@tonic-gate while (size != 0) { 7830Sstevel@tonic-gate memlist_read_lock(); 7840Sstevel@tonic-gate for (mlp = phys_install; mlp != NULL; mlp = mlp->next) { 7850Sstevel@tonic-gate if (address >= (mlp->address + mlp->size)) 7860Sstevel@tonic-gate continue; 7870Sstevel@tonic-gate if ((address + size) > mlp->address) 7880Sstevel@tonic-gate break; 7890Sstevel@tonic-gate } 7900Sstevel@tonic-gate if (mlp == NULL) { 7910Sstevel@tonic-gate address += size; 7920Sstevel@tonic-gate size = 0; 7930Sstevel@tonic-gate thislen = 0; 7940Sstevel@tonic-gate } else { 7950Sstevel@tonic-gate if (address < mlp->address) { 7960Sstevel@tonic-gate size -= (mlp->address - address); 7970Sstevel@tonic-gate address = mlp->address; 7980Sstevel@tonic-gate } 7990Sstevel@tonic-gate ASSERT(address >= mlp->address); 8000Sstevel@tonic-gate if ((address + size) > (mlp->address + mlp->size)) { 8010Sstevel@tonic-gate thislen = mlp->size - (address - mlp->address); 8020Sstevel@tonic-gate } else { 8030Sstevel@tonic-gate thislen = size; 8040Sstevel@tonic-gate } 8050Sstevel@tonic-gate } 8060Sstevel@tonic-gate memlist_read_unlock(); 8070Sstevel@tonic-gate /* TODO: phys_install could change now */ 8080Sstevel@tonic-gate if (thislen == 0) 8090Sstevel@tonic-gate continue; 8100Sstevel@tonic-gate mdsp = kmem_zalloc(sizeof (struct memdelspan), KM_SLEEP); 8110Sstevel@tonic-gate mdsp->mds_base = btop(address); 8120Sstevel@tonic-gate mdsp->mds_npgs = btop(thislen); 8130Sstevel@tonic-gate mdsp->mds_next = mdsp_new; 8140Sstevel@tonic-gate mdsp_new = mdsp; 8150Sstevel@tonic-gate address += thislen; 8160Sstevel@tonic-gate size -= thislen; 8170Sstevel@tonic-gate } 8180Sstevel@tonic-gate return (mdsp_new); 8190Sstevel@tonic-gate } 8200Sstevel@tonic-gate 8210Sstevel@tonic-gate static void 8220Sstevel@tonic-gate free_delspans(struct memdelspan *mdsp) 8230Sstevel@tonic-gate { 8240Sstevel@tonic-gate struct memdelspan *amdsp; 8250Sstevel@tonic-gate 8260Sstevel@tonic-gate while ((amdsp = mdsp) != NULL) { 8270Sstevel@tonic-gate mdsp = amdsp->mds_next; 8280Sstevel@tonic-gate kmem_free(amdsp, sizeof (struct memdelspan)); 8290Sstevel@tonic-gate } 8300Sstevel@tonic-gate } 8310Sstevel@tonic-gate 8320Sstevel@tonic-gate /* 8330Sstevel@tonic-gate * Concatenate lists. No list ordering is required. 8340Sstevel@tonic-gate */ 8350Sstevel@tonic-gate 8360Sstevel@tonic-gate static void 8370Sstevel@tonic-gate delspan_concat(struct memdelspan **mdspp, struct memdelspan *mdsp) 8380Sstevel@tonic-gate { 8390Sstevel@tonic-gate while (*mdspp != NULL) 8400Sstevel@tonic-gate mdspp = &(*mdspp)->mds_next; 8410Sstevel@tonic-gate 8420Sstevel@tonic-gate *mdspp = mdsp; 8430Sstevel@tonic-gate } 8440Sstevel@tonic-gate 8450Sstevel@tonic-gate /* 8460Sstevel@tonic-gate * Given a new list of delspans, check there is no overlap with 8470Sstevel@tonic-gate * all existing span activity (add or delete) and then concatenate 8480Sstevel@tonic-gate * the new spans to the given list. 8490Sstevel@tonic-gate * Return 1 for OK, 0 if overlapping. 8500Sstevel@tonic-gate */ 8510Sstevel@tonic-gate static int 8520Sstevel@tonic-gate delspan_insert( 8530Sstevel@tonic-gate struct transit_list *my_tlp, 8540Sstevel@tonic-gate struct memdelspan *mdsp_new) 8550Sstevel@tonic-gate { 8560Sstevel@tonic-gate struct transit_list_head *trh; 8570Sstevel@tonic-gate struct transit_list *tlp; 8580Sstevel@tonic-gate int ret; 8590Sstevel@tonic-gate 8600Sstevel@tonic-gate trh = &transit_list_head; 8610Sstevel@tonic-gate 8620Sstevel@tonic-gate ASSERT(my_tlp != NULL); 8630Sstevel@tonic-gate ASSERT(mdsp_new != NULL); 8640Sstevel@tonic-gate 8650Sstevel@tonic-gate ret = 1; 8660Sstevel@tonic-gate mutex_enter(&trh->trh_lock); 8670Sstevel@tonic-gate /* ASSERT(my_tlp->trl_spans == NULL || tlp_in_list(trh, my_tlp)); */ 8680Sstevel@tonic-gate for (tlp = trh->trh_head; tlp != NULL; tlp = tlp->trl_next) { 8690Sstevel@tonic-gate struct memdelspan *mdsp; 8700Sstevel@tonic-gate 8710Sstevel@tonic-gate for (mdsp = tlp->trl_spans; mdsp != NULL; 8720Sstevel@tonic-gate mdsp = mdsp->mds_next) { 8730Sstevel@tonic-gate struct memdelspan *nmdsp; 8740Sstevel@tonic-gate 8750Sstevel@tonic-gate for (nmdsp = mdsp_new; nmdsp != NULL; 8760Sstevel@tonic-gate nmdsp = nmdsp->mds_next) { 8770Sstevel@tonic-gate if (overlapping(mdsp->mds_base, mdsp->mds_npgs, 8780Sstevel@tonic-gate nmdsp->mds_base, nmdsp->mds_npgs)) { 8790Sstevel@tonic-gate ret = 0; 8800Sstevel@tonic-gate goto done; 8810Sstevel@tonic-gate } 8820Sstevel@tonic-gate } 8830Sstevel@tonic-gate } 8840Sstevel@tonic-gate } 8850Sstevel@tonic-gate done: 8860Sstevel@tonic-gate if (ret != 0) { 8870Sstevel@tonic-gate if (my_tlp->trl_spans == NULL) 8880Sstevel@tonic-gate transit_list_insert(my_tlp); 8890Sstevel@tonic-gate delspan_concat(&my_tlp->trl_spans, mdsp_new); 8900Sstevel@tonic-gate } 8910Sstevel@tonic-gate mutex_exit(&trh->trh_lock); 8920Sstevel@tonic-gate return (ret); 8930Sstevel@tonic-gate } 8940Sstevel@tonic-gate 8950Sstevel@tonic-gate static void 8960Sstevel@tonic-gate delspan_remove( 8970Sstevel@tonic-gate struct transit_list *my_tlp, 8980Sstevel@tonic-gate pfn_t base, 8990Sstevel@tonic-gate pgcnt_t npgs) 9000Sstevel@tonic-gate { 9010Sstevel@tonic-gate struct transit_list_head *trh; 9020Sstevel@tonic-gate struct memdelspan *mdsp; 9030Sstevel@tonic-gate 9040Sstevel@tonic-gate trh = &transit_list_head; 9050Sstevel@tonic-gate 9060Sstevel@tonic-gate ASSERT(my_tlp != NULL); 9070Sstevel@tonic-gate 9080Sstevel@tonic-gate mutex_enter(&trh->trh_lock); 9090Sstevel@tonic-gate if ((mdsp = my_tlp->trl_spans) != NULL) { 9100Sstevel@tonic-gate if (npgs == 0) { 9110Sstevel@tonic-gate my_tlp->trl_spans = NULL; 9120Sstevel@tonic-gate free_delspans(mdsp); 9130Sstevel@tonic-gate transit_list_remove(my_tlp); 9140Sstevel@tonic-gate } else { 9150Sstevel@tonic-gate struct memdelspan **prv; 9160Sstevel@tonic-gate 9170Sstevel@tonic-gate prv = &my_tlp->trl_spans; 9180Sstevel@tonic-gate while (mdsp != NULL) { 9190Sstevel@tonic-gate pfn_t p_end; 9200Sstevel@tonic-gate 9210Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs; 9220Sstevel@tonic-gate if (mdsp->mds_base >= base && 9230Sstevel@tonic-gate p_end <= (base + npgs)) { 9240Sstevel@tonic-gate *prv = mdsp->mds_next; 9250Sstevel@tonic-gate mdsp->mds_next = NULL; 9260Sstevel@tonic-gate free_delspans(mdsp); 9270Sstevel@tonic-gate } else { 9280Sstevel@tonic-gate prv = &mdsp->mds_next; 9290Sstevel@tonic-gate } 9300Sstevel@tonic-gate mdsp = *prv; 9310Sstevel@tonic-gate } 9320Sstevel@tonic-gate if (my_tlp->trl_spans == NULL) 9330Sstevel@tonic-gate transit_list_remove(my_tlp); 9340Sstevel@tonic-gate } 9350Sstevel@tonic-gate } 9360Sstevel@tonic-gate mutex_exit(&trh->trh_lock); 9370Sstevel@tonic-gate } 9380Sstevel@tonic-gate 9390Sstevel@tonic-gate /* 9400Sstevel@tonic-gate * Reserve interface for add to stop delete before add finished. 9410Sstevel@tonic-gate * This list is only accessed through the delspan_insert/remove 9420Sstevel@tonic-gate * functions and so is fully protected by the mutex in struct transit_list. 9430Sstevel@tonic-gate */ 9440Sstevel@tonic-gate 9450Sstevel@tonic-gate static struct transit_list reserve_transit; 9460Sstevel@tonic-gate 9470Sstevel@tonic-gate static int 9480Sstevel@tonic-gate delspan_reserve(pfn_t base, pgcnt_t npgs) 9490Sstevel@tonic-gate { 9500Sstevel@tonic-gate struct memdelspan *mdsp; 9510Sstevel@tonic-gate int ret; 9520Sstevel@tonic-gate 9530Sstevel@tonic-gate mdsp = kmem_zalloc(sizeof (struct memdelspan), KM_SLEEP); 9540Sstevel@tonic-gate mdsp->mds_base = base; 9550Sstevel@tonic-gate mdsp->mds_npgs = npgs; 9560Sstevel@tonic-gate if ((ret = delspan_insert(&reserve_transit, mdsp)) == 0) { 9570Sstevel@tonic-gate free_delspans(mdsp); 9580Sstevel@tonic-gate } 9590Sstevel@tonic-gate return (ret); 9600Sstevel@tonic-gate } 9610Sstevel@tonic-gate 9620Sstevel@tonic-gate static void 9630Sstevel@tonic-gate delspan_unreserve(pfn_t base, pgcnt_t npgs) 9640Sstevel@tonic-gate { 9650Sstevel@tonic-gate delspan_remove(&reserve_transit, base, npgs); 9660Sstevel@tonic-gate } 9670Sstevel@tonic-gate 9680Sstevel@tonic-gate /* 9690Sstevel@tonic-gate * Return whether memseg was created by kphysm_add_memory_dynamic(). 9700Sstevel@tonic-gate * If this is the case and startp non zero, return also the start pfn 9710Sstevel@tonic-gate * of the meta data via startp. 9720Sstevel@tonic-gate */ 9730Sstevel@tonic-gate static int 9740Sstevel@tonic-gate memseg_is_dynamic(struct memseg *seg, pfn_t *startp) 9750Sstevel@tonic-gate { 9760Sstevel@tonic-gate pfn_t pt_start; 9770Sstevel@tonic-gate 9780Sstevel@tonic-gate if ((seg->msegflags & MEMSEG_DYNAMIC) == 0) 9790Sstevel@tonic-gate return (0); 9800Sstevel@tonic-gate 9810Sstevel@tonic-gate /* Meta data is required to be at the beginning */ 9820Sstevel@tonic-gate ASSERT(hat_getpfnum(kas.a_hat, (caddr_t)seg->epages) < seg->pages_base); 9830Sstevel@tonic-gate 9840Sstevel@tonic-gate pt_start = hat_getpfnum(kas.a_hat, (caddr_t)seg->pages); 9850Sstevel@tonic-gate if (startp != NULL) 9860Sstevel@tonic-gate *startp = pt_start; 9870Sstevel@tonic-gate 9880Sstevel@tonic-gate return (1); 9890Sstevel@tonic-gate } 9900Sstevel@tonic-gate 9910Sstevel@tonic-gate int 9920Sstevel@tonic-gate kphysm_del_span( 9930Sstevel@tonic-gate memhandle_t handle, 9940Sstevel@tonic-gate pfn_t base, 9950Sstevel@tonic-gate pgcnt_t npgs) 9960Sstevel@tonic-gate { 9970Sstevel@tonic-gate struct mem_handle *mhp; 9980Sstevel@tonic-gate struct memseg *seg; 9990Sstevel@tonic-gate struct memdelspan *mdsp; 10000Sstevel@tonic-gate struct memdelspan *mdsp_new; 10010Sstevel@tonic-gate pgcnt_t phys_pages, vm_pages; 10020Sstevel@tonic-gate pfn_t p_end; 10030Sstevel@tonic-gate page_t *pp; 10040Sstevel@tonic-gate int ret; 10050Sstevel@tonic-gate 10060Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle); 10070Sstevel@tonic-gate if (mhp == NULL) { 10080Sstevel@tonic-gate return (KPHYSM_EHANDLE); 10090Sstevel@tonic-gate } 10100Sstevel@tonic-gate if (mhp->mh_state != MHND_INIT) { 10110Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 10120Sstevel@tonic-gate return (KPHYSM_ESEQUENCE); 10130Sstevel@tonic-gate } 10140Sstevel@tonic-gate 10150Sstevel@tonic-gate /* 10160Sstevel@tonic-gate * Intersect the span with the installed memory list (phys_install). 10170Sstevel@tonic-gate */ 10180Sstevel@tonic-gate mdsp_new = span_to_install(base, npgs); 10190Sstevel@tonic-gate if (mdsp_new == NULL) { 10200Sstevel@tonic-gate /* 10210Sstevel@tonic-gate * No physical memory in this range. Is this an 10220Sstevel@tonic-gate * error? If an attempt to start the delete is made 10230Sstevel@tonic-gate * for OK returns from del_span such as this, start will 10240Sstevel@tonic-gate * return an error. 10250Sstevel@tonic-gate * Could return KPHYSM_ENOWORK. 10260Sstevel@tonic-gate */ 10270Sstevel@tonic-gate /* 10280Sstevel@tonic-gate * It is assumed that there are no error returns 10290Sstevel@tonic-gate * from span_to_install() due to kmem_alloc failure. 10300Sstevel@tonic-gate */ 10310Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 10320Sstevel@tonic-gate return (KPHYSM_OK); 10330Sstevel@tonic-gate } 10340Sstevel@tonic-gate /* 10350Sstevel@tonic-gate * Does this span overlap an existing span? 10360Sstevel@tonic-gate */ 10370Sstevel@tonic-gate if (delspan_insert(&mhp->mh_transit, mdsp_new) == 0) { 10380Sstevel@tonic-gate /* 10390Sstevel@tonic-gate * Differentiate between already on list for this handle 10400Sstevel@tonic-gate * (KPHYSM_EDUP) and busy elsewhere (KPHYSM_EBUSY). 10410Sstevel@tonic-gate */ 10420Sstevel@tonic-gate ret = KPHYSM_EBUSY; 10430Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 10440Sstevel@tonic-gate mdsp = mdsp->mds_next) { 10450Sstevel@tonic-gate if (overlapping(mdsp->mds_base, mdsp->mds_npgs, 10460Sstevel@tonic-gate base, npgs)) { 10470Sstevel@tonic-gate ret = KPHYSM_EDUP; 10480Sstevel@tonic-gate break; 10490Sstevel@tonic-gate } 10500Sstevel@tonic-gate } 10510Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 10520Sstevel@tonic-gate free_delspans(mdsp_new); 10530Sstevel@tonic-gate return (ret); 10540Sstevel@tonic-gate } 10550Sstevel@tonic-gate /* 10560Sstevel@tonic-gate * At this point the spans in mdsp_new have been inserted into the 10570Sstevel@tonic-gate * list of spans for this handle and thereby to the global list of 10580Sstevel@tonic-gate * spans being processed. Each of these spans must now be checked 10590Sstevel@tonic-gate * for relocatability. As a side-effect segments in the memseg list 10600Sstevel@tonic-gate * may be split. 10610Sstevel@tonic-gate * 10620Sstevel@tonic-gate * Note that mdsp_new can no longer be used as it is now part of 10630Sstevel@tonic-gate * a larger list. Select elements of this larger list based 10640Sstevel@tonic-gate * on base and npgs. 10650Sstevel@tonic-gate */ 10660Sstevel@tonic-gate restart: 10670Sstevel@tonic-gate phys_pages = 0; 10680Sstevel@tonic-gate vm_pages = 0; 10690Sstevel@tonic-gate ret = KPHYSM_OK; 10700Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 10710Sstevel@tonic-gate mdsp = mdsp->mds_next) { 10720Sstevel@tonic-gate pgcnt_t pages_checked; 10730Sstevel@tonic-gate 10740Sstevel@tonic-gate if (!overlapping(mdsp->mds_base, mdsp->mds_npgs, base, npgs)) { 10750Sstevel@tonic-gate continue; 10760Sstevel@tonic-gate } 10770Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs; 10780Sstevel@tonic-gate /* 10790Sstevel@tonic-gate * The pages_checked count is a hack. All pages should be 10800Sstevel@tonic-gate * checked for relocatability. Those not covered by memsegs 10810Sstevel@tonic-gate * should be tested with arch_kphysm_del_span_ok(). 10820Sstevel@tonic-gate */ 10830Sstevel@tonic-gate pages_checked = 0; 10840Sstevel@tonic-gate for (seg = memsegs; seg; seg = seg->next) { 10850Sstevel@tonic-gate pfn_t mseg_start; 10860Sstevel@tonic-gate 10870Sstevel@tonic-gate if (seg->pages_base >= p_end || 10880Sstevel@tonic-gate seg->pages_end <= mdsp->mds_base) { 10890Sstevel@tonic-gate /* Span and memseg don't overlap. */ 10900Sstevel@tonic-gate continue; 10910Sstevel@tonic-gate } 10920Sstevel@tonic-gate /* Check that segment is suitable for delete. */ 10930Sstevel@tonic-gate if (memseg_is_dynamic(seg, &mseg_start)) { 10940Sstevel@tonic-gate /* 10950Sstevel@tonic-gate * Can only delete whole added segments 10960Sstevel@tonic-gate * for the moment. 10970Sstevel@tonic-gate * Check that this is completely within the 10980Sstevel@tonic-gate * span. 10990Sstevel@tonic-gate */ 11000Sstevel@tonic-gate if (mseg_start < mdsp->mds_base || 11010Sstevel@tonic-gate seg->pages_end > p_end) { 11020Sstevel@tonic-gate ret = KPHYSM_EBUSY; 11030Sstevel@tonic-gate break; 11040Sstevel@tonic-gate } 11050Sstevel@tonic-gate pages_checked += seg->pages_end - mseg_start; 11060Sstevel@tonic-gate } else { 11070Sstevel@tonic-gate /* 11080Sstevel@tonic-gate * Set mseg_start for accounting below. 11090Sstevel@tonic-gate */ 11100Sstevel@tonic-gate mseg_start = seg->pages_base; 11110Sstevel@tonic-gate /* 11120Sstevel@tonic-gate * If this segment is larger than the span, 11130Sstevel@tonic-gate * try to split it. After the split, it 11140Sstevel@tonic-gate * is necessary to restart. 11150Sstevel@tonic-gate */ 11160Sstevel@tonic-gate if (seg->pages_base < mdsp->mds_base || 11170Sstevel@tonic-gate seg->pages_end > p_end) { 11180Sstevel@tonic-gate pfn_t abase; 11190Sstevel@tonic-gate pgcnt_t anpgs; 11200Sstevel@tonic-gate int s_ret; 11210Sstevel@tonic-gate 11220Sstevel@tonic-gate /* Split required. */ 11230Sstevel@tonic-gate if (mdsp->mds_base < seg->pages_base) 11240Sstevel@tonic-gate abase = seg->pages_base; 11250Sstevel@tonic-gate else 11260Sstevel@tonic-gate abase = mdsp->mds_base; 11270Sstevel@tonic-gate if (p_end > seg->pages_end) 11280Sstevel@tonic-gate anpgs = seg->pages_end - abase; 11290Sstevel@tonic-gate else 11300Sstevel@tonic-gate anpgs = p_end - abase; 11310Sstevel@tonic-gate s_ret = kphysm_split_memseg(abase, 11320Sstevel@tonic-gate anpgs); 11330Sstevel@tonic-gate if (s_ret == 0) { 11340Sstevel@tonic-gate /* Split failed. */ 11350Sstevel@tonic-gate ret = KPHYSM_ERESOURCE; 11360Sstevel@tonic-gate break; 11370Sstevel@tonic-gate } 11380Sstevel@tonic-gate goto restart; 11390Sstevel@tonic-gate } 11400Sstevel@tonic-gate pages_checked += 11410Sstevel@tonic-gate seg->pages_end - seg->pages_base; 11420Sstevel@tonic-gate } 11430Sstevel@tonic-gate /* 11440Sstevel@tonic-gate * The memseg is wholly within the delete span. 11450Sstevel@tonic-gate * The individual pages can now be checked. 11460Sstevel@tonic-gate */ 11470Sstevel@tonic-gate /* Cage test. */ 11480Sstevel@tonic-gate for (pp = seg->pages; pp < seg->epages; pp++) { 11490Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 11500Sstevel@tonic-gate ret = KPHYSM_ENONRELOC; 11510Sstevel@tonic-gate break; 11520Sstevel@tonic-gate } 11530Sstevel@tonic-gate } 11540Sstevel@tonic-gate if (ret != KPHYSM_OK) { 11550Sstevel@tonic-gate break; 11560Sstevel@tonic-gate } 11570Sstevel@tonic-gate phys_pages += (seg->pages_end - mseg_start); 11580Sstevel@tonic-gate vm_pages += MSEG_NPAGES(seg); 11590Sstevel@tonic-gate } 11600Sstevel@tonic-gate if (ret != KPHYSM_OK) 11610Sstevel@tonic-gate break; 11620Sstevel@tonic-gate if (pages_checked != mdsp->mds_npgs) { 11630Sstevel@tonic-gate ret = KPHYSM_ENONRELOC; 11640Sstevel@tonic-gate break; 11650Sstevel@tonic-gate } 11660Sstevel@tonic-gate } 11670Sstevel@tonic-gate 11680Sstevel@tonic-gate if (ret == KPHYSM_OK) { 11690Sstevel@tonic-gate mhp->mh_phys_pages += phys_pages; 11700Sstevel@tonic-gate mhp->mh_vm_pages += vm_pages; 11710Sstevel@tonic-gate } else { 11720Sstevel@tonic-gate /* 11730Sstevel@tonic-gate * Keep holding the mh_mutex to prevent it going away. 11740Sstevel@tonic-gate */ 11750Sstevel@tonic-gate delspan_remove(&mhp->mh_transit, base, npgs); 11760Sstevel@tonic-gate } 11770Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 11780Sstevel@tonic-gate return (ret); 11790Sstevel@tonic-gate } 11800Sstevel@tonic-gate 11810Sstevel@tonic-gate int 11820Sstevel@tonic-gate kphysm_del_span_query( 11830Sstevel@tonic-gate pfn_t base, 11840Sstevel@tonic-gate pgcnt_t npgs, 11850Sstevel@tonic-gate memquery_t *mqp) 11860Sstevel@tonic-gate { 11870Sstevel@tonic-gate struct memdelspan *mdsp; 11880Sstevel@tonic-gate struct memdelspan *mdsp_new; 11890Sstevel@tonic-gate int done_first_nonreloc; 11900Sstevel@tonic-gate 11910Sstevel@tonic-gate mqp->phys_pages = 0; 11920Sstevel@tonic-gate mqp->managed = 0; 11930Sstevel@tonic-gate mqp->nonrelocatable = 0; 11940Sstevel@tonic-gate mqp->first_nonrelocatable = 0; 11950Sstevel@tonic-gate mqp->last_nonrelocatable = 0; 11960Sstevel@tonic-gate 11970Sstevel@tonic-gate mdsp_new = span_to_install(base, npgs); 11980Sstevel@tonic-gate /* 11990Sstevel@tonic-gate * It is OK to proceed here if mdsp_new == NULL. 12000Sstevel@tonic-gate */ 12010Sstevel@tonic-gate done_first_nonreloc = 0; 12020Sstevel@tonic-gate for (mdsp = mdsp_new; mdsp != NULL; mdsp = mdsp->mds_next) { 12030Sstevel@tonic-gate pfn_t sbase; 12040Sstevel@tonic-gate pgcnt_t snpgs; 12050Sstevel@tonic-gate 12060Sstevel@tonic-gate mqp->phys_pages += mdsp->mds_npgs; 12070Sstevel@tonic-gate sbase = mdsp->mds_base; 12080Sstevel@tonic-gate snpgs = mdsp->mds_npgs; 12090Sstevel@tonic-gate while (snpgs != 0) { 12100Sstevel@tonic-gate struct memseg *lseg, *seg; 12110Sstevel@tonic-gate pfn_t p_end; 12120Sstevel@tonic-gate page_t *pp; 12130Sstevel@tonic-gate pfn_t mseg_start; 12140Sstevel@tonic-gate 12150Sstevel@tonic-gate p_end = sbase + snpgs; 12160Sstevel@tonic-gate /* 12170Sstevel@tonic-gate * Find the lowest addressed memseg that starts 12180Sstevel@tonic-gate * after sbase and account for it. 12190Sstevel@tonic-gate * This is to catch dynamic memsegs whose start 12200Sstevel@tonic-gate * is hidden. 12210Sstevel@tonic-gate */ 12220Sstevel@tonic-gate seg = NULL; 12230Sstevel@tonic-gate for (lseg = memsegs; lseg != NULL; lseg = lseg->next) { 12240Sstevel@tonic-gate if ((lseg->pages_base >= sbase) || 12250Sstevel@tonic-gate (lseg->pages_base < p_end && 12260Sstevel@tonic-gate lseg->pages_end > sbase)) { 12270Sstevel@tonic-gate if (seg == NULL || 12280Sstevel@tonic-gate seg->pages_base > lseg->pages_base) 12290Sstevel@tonic-gate seg = lseg; 12300Sstevel@tonic-gate } 12310Sstevel@tonic-gate } 12320Sstevel@tonic-gate if (seg != NULL) { 12330Sstevel@tonic-gate if (!memseg_is_dynamic(seg, &mseg_start)) { 12340Sstevel@tonic-gate mseg_start = seg->pages_base; 12350Sstevel@tonic-gate } 12360Sstevel@tonic-gate /* 12370Sstevel@tonic-gate * Now have the full extent of the memseg so 12380Sstevel@tonic-gate * do the range check. 12390Sstevel@tonic-gate */ 12400Sstevel@tonic-gate if (mseg_start >= p_end || 12410Sstevel@tonic-gate seg->pages_end <= sbase) { 12420Sstevel@tonic-gate /* Span does not overlap memseg. */ 12430Sstevel@tonic-gate seg = NULL; 12440Sstevel@tonic-gate } 12450Sstevel@tonic-gate } 12460Sstevel@tonic-gate /* 12470Sstevel@tonic-gate * Account for gap either before the segment if 12480Sstevel@tonic-gate * there is one or to the end of the span. 12490Sstevel@tonic-gate */ 12500Sstevel@tonic-gate if (seg == NULL || mseg_start > sbase) { 12510Sstevel@tonic-gate pfn_t a_end; 12520Sstevel@tonic-gate 12530Sstevel@tonic-gate a_end = (seg == NULL) ? p_end : mseg_start; 12540Sstevel@tonic-gate /* 12550Sstevel@tonic-gate * Check with arch layer for relocatability. 12560Sstevel@tonic-gate */ 12570Sstevel@tonic-gate if (arch_kphysm_del_span_ok(sbase, 12580Sstevel@tonic-gate (a_end - sbase))) { 12590Sstevel@tonic-gate /* 12600Sstevel@tonic-gate * No non-relocatble pages in this 12610Sstevel@tonic-gate * area, avoid the fine-grained 12620Sstevel@tonic-gate * test. 12630Sstevel@tonic-gate */ 12640Sstevel@tonic-gate snpgs -= (a_end - sbase); 12650Sstevel@tonic-gate sbase = a_end; 12660Sstevel@tonic-gate } 12670Sstevel@tonic-gate while (sbase < a_end) { 12680Sstevel@tonic-gate if (!arch_kphysm_del_span_ok(sbase, 12690Sstevel@tonic-gate 1)) { 12700Sstevel@tonic-gate mqp->nonrelocatable++; 12710Sstevel@tonic-gate if (!done_first_nonreloc) { 12720Sstevel@tonic-gate mqp-> 12730Sstevel@tonic-gate first_nonrelocatable 12740Sstevel@tonic-gate = sbase; 12750Sstevel@tonic-gate done_first_nonreloc = 1; 12760Sstevel@tonic-gate } 12770Sstevel@tonic-gate mqp->last_nonrelocatable = 12780Sstevel@tonic-gate sbase; 12790Sstevel@tonic-gate } 12800Sstevel@tonic-gate sbase++; 12810Sstevel@tonic-gate snpgs--; 12820Sstevel@tonic-gate } 12830Sstevel@tonic-gate } 12840Sstevel@tonic-gate if (seg != NULL) { 12850Sstevel@tonic-gate ASSERT(mseg_start <= sbase); 12860Sstevel@tonic-gate if (seg->pages_base != mseg_start && 12870Sstevel@tonic-gate seg->pages_base > sbase) { 12880Sstevel@tonic-gate pgcnt_t skip_pgs; 12890Sstevel@tonic-gate 12900Sstevel@tonic-gate /* 12910Sstevel@tonic-gate * Skip the page_t area of a 12920Sstevel@tonic-gate * dynamic memseg. 12930Sstevel@tonic-gate */ 12940Sstevel@tonic-gate skip_pgs = seg->pages_base - sbase; 12950Sstevel@tonic-gate if (snpgs <= skip_pgs) { 12960Sstevel@tonic-gate sbase += snpgs; 12970Sstevel@tonic-gate snpgs = 0; 12980Sstevel@tonic-gate continue; 12990Sstevel@tonic-gate } 13000Sstevel@tonic-gate snpgs -= skip_pgs; 13010Sstevel@tonic-gate sbase += skip_pgs; 13020Sstevel@tonic-gate } 13030Sstevel@tonic-gate ASSERT(snpgs != 0); 13040Sstevel@tonic-gate ASSERT(seg->pages_base <= sbase); 13050Sstevel@tonic-gate /* 13060Sstevel@tonic-gate * The individual pages can now be checked. 13070Sstevel@tonic-gate */ 13080Sstevel@tonic-gate for (pp = seg->pages + 13090Sstevel@tonic-gate (sbase - seg->pages_base); 13100Sstevel@tonic-gate snpgs != 0 && pp < seg->epages; pp++) { 13110Sstevel@tonic-gate mqp->managed++; 13120Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 13130Sstevel@tonic-gate mqp->nonrelocatable++; 13140Sstevel@tonic-gate if (!done_first_nonreloc) { 13150Sstevel@tonic-gate mqp-> 13160Sstevel@tonic-gate first_nonrelocatable 13170Sstevel@tonic-gate = sbase; 13180Sstevel@tonic-gate done_first_nonreloc = 1; 13190Sstevel@tonic-gate } 13200Sstevel@tonic-gate mqp->last_nonrelocatable = 13210Sstevel@tonic-gate sbase; 13220Sstevel@tonic-gate } 13230Sstevel@tonic-gate sbase++; 13240Sstevel@tonic-gate snpgs--; 13250Sstevel@tonic-gate } 13260Sstevel@tonic-gate } 13270Sstevel@tonic-gate } 13280Sstevel@tonic-gate } 13290Sstevel@tonic-gate 13300Sstevel@tonic-gate free_delspans(mdsp_new); 13310Sstevel@tonic-gate 13320Sstevel@tonic-gate return (KPHYSM_OK); 13330Sstevel@tonic-gate } 13340Sstevel@tonic-gate 13350Sstevel@tonic-gate /* 13360Sstevel@tonic-gate * This release function can be called at any stage as follows: 13370Sstevel@tonic-gate * _gethandle only called 13380Sstevel@tonic-gate * _span(s) only called 13390Sstevel@tonic-gate * _start called but failed 13400Sstevel@tonic-gate * delete thread exited 13410Sstevel@tonic-gate */ 13420Sstevel@tonic-gate int 13430Sstevel@tonic-gate kphysm_del_release(memhandle_t handle) 13440Sstevel@tonic-gate { 13450Sstevel@tonic-gate struct mem_handle *mhp; 13460Sstevel@tonic-gate 13470Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle); 13480Sstevel@tonic-gate if (mhp == NULL) { 13490Sstevel@tonic-gate return (KPHYSM_EHANDLE); 13500Sstevel@tonic-gate } 13510Sstevel@tonic-gate switch (mhp->mh_state) { 13520Sstevel@tonic-gate case MHND_STARTING: 13530Sstevel@tonic-gate case MHND_RUNNING: 13540Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 13550Sstevel@tonic-gate return (KPHYSM_ENOTFINISHED); 13560Sstevel@tonic-gate case MHND_FREE: 13570Sstevel@tonic-gate ASSERT(mhp->mh_state != MHND_FREE); 13580Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 13590Sstevel@tonic-gate return (KPHYSM_EHANDLE); 13600Sstevel@tonic-gate case MHND_INIT: 13610Sstevel@tonic-gate break; 13620Sstevel@tonic-gate case MHND_DONE: 13630Sstevel@tonic-gate break; 13640Sstevel@tonic-gate case MHND_RELEASE: 13650Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 13660Sstevel@tonic-gate return (KPHYSM_ESEQUENCE); 13670Sstevel@tonic-gate default: 13680Sstevel@tonic-gate #ifdef DEBUG 13690Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_del_release(0x%p) state corrupt %d", 13700Sstevel@tonic-gate (void *)mhp, mhp->mh_state); 13710Sstevel@tonic-gate #endif /* DEBUG */ 13720Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 13730Sstevel@tonic-gate return (KPHYSM_EHANDLE); 13740Sstevel@tonic-gate } 13750Sstevel@tonic-gate /* 13760Sstevel@tonic-gate * Set state so that we can wait if necessary. 13770Sstevel@tonic-gate * Also this means that we have read/write access to all 13780Sstevel@tonic-gate * fields except mh_exthandle and mh_state. 13790Sstevel@tonic-gate */ 13800Sstevel@tonic-gate mhp->mh_state = MHND_RELEASE; 13810Sstevel@tonic-gate /* 13820Sstevel@tonic-gate * The mem_handle cannot be de-allocated by any other operation 13830Sstevel@tonic-gate * now, so no need to hold mh_mutex. 13840Sstevel@tonic-gate */ 13850Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 13860Sstevel@tonic-gate 13870Sstevel@tonic-gate delspan_remove(&mhp->mh_transit, 0, 0); 13880Sstevel@tonic-gate mhp->mh_phys_pages = 0; 13890Sstevel@tonic-gate mhp->mh_vm_pages = 0; 13900Sstevel@tonic-gate mhp->mh_hold_todo = 0; 13910Sstevel@tonic-gate mhp->mh_delete_complete = NULL; 13920Sstevel@tonic-gate mhp->mh_delete_complete_arg = NULL; 13930Sstevel@tonic-gate mhp->mh_cancel = 0; 13940Sstevel@tonic-gate 13950Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 13960Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_RELEASE); 13970Sstevel@tonic-gate mhp->mh_state = MHND_FREE; 13980Sstevel@tonic-gate 13990Sstevel@tonic-gate kphysm_free_mem_handle(mhp); 14000Sstevel@tonic-gate 14010Sstevel@tonic-gate return (KPHYSM_OK); 14020Sstevel@tonic-gate } 14030Sstevel@tonic-gate 14040Sstevel@tonic-gate /* 14050Sstevel@tonic-gate * This cancel function can only be called with the thread running. 14060Sstevel@tonic-gate */ 14070Sstevel@tonic-gate int 14080Sstevel@tonic-gate kphysm_del_cancel(memhandle_t handle) 14090Sstevel@tonic-gate { 14100Sstevel@tonic-gate struct mem_handle *mhp; 14110Sstevel@tonic-gate 14120Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle); 14130Sstevel@tonic-gate if (mhp == NULL) { 14140Sstevel@tonic-gate return (KPHYSM_EHANDLE); 14150Sstevel@tonic-gate } 14160Sstevel@tonic-gate if (mhp->mh_state != MHND_STARTING && mhp->mh_state != MHND_RUNNING) { 14170Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 14180Sstevel@tonic-gate return (KPHYSM_ENOTRUNNING); 14190Sstevel@tonic-gate } 14200Sstevel@tonic-gate /* 14210Sstevel@tonic-gate * Set the cancel flag and wake the delete thread up. 14220Sstevel@tonic-gate * The thread may be waiting on I/O, so the effect of the cancel 14230Sstevel@tonic-gate * may be delayed. 14240Sstevel@tonic-gate */ 14250Sstevel@tonic-gate if (mhp->mh_cancel == 0) { 14260Sstevel@tonic-gate mhp->mh_cancel = KPHYSM_ECANCELLED; 14270Sstevel@tonic-gate cv_signal(&mhp->mh_cv); 14280Sstevel@tonic-gate } 14290Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 14300Sstevel@tonic-gate return (KPHYSM_OK); 14310Sstevel@tonic-gate } 14320Sstevel@tonic-gate 14330Sstevel@tonic-gate int 14340Sstevel@tonic-gate kphysm_del_status( 14350Sstevel@tonic-gate memhandle_t handle, 14360Sstevel@tonic-gate memdelstat_t *mdstp) 14370Sstevel@tonic-gate { 14380Sstevel@tonic-gate struct mem_handle *mhp; 14390Sstevel@tonic-gate 14400Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle); 14410Sstevel@tonic-gate if (mhp == NULL) { 14420Sstevel@tonic-gate return (KPHYSM_EHANDLE); 14430Sstevel@tonic-gate } 14440Sstevel@tonic-gate /* 14450Sstevel@tonic-gate * Calling kphysm_del_status() is allowed before the delete 14460Sstevel@tonic-gate * is started to allow for status display. 14470Sstevel@tonic-gate */ 14480Sstevel@tonic-gate if (mhp->mh_state != MHND_INIT && mhp->mh_state != MHND_STARTING && 14490Sstevel@tonic-gate mhp->mh_state != MHND_RUNNING) { 14500Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 14510Sstevel@tonic-gate return (KPHYSM_ENOTRUNNING); 14520Sstevel@tonic-gate } 14530Sstevel@tonic-gate mdstp->phys_pages = mhp->mh_phys_pages; 14540Sstevel@tonic-gate mdstp->managed = mhp->mh_vm_pages; 14550Sstevel@tonic-gate mdstp->collected = mhp->mh_vm_pages - mhp->mh_hold_todo; 14560Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 14570Sstevel@tonic-gate return (KPHYSM_OK); 14580Sstevel@tonic-gate } 14590Sstevel@tonic-gate 14600Sstevel@tonic-gate static int mem_delete_additional_pages = 100; 14610Sstevel@tonic-gate 14620Sstevel@tonic-gate static int 14630Sstevel@tonic-gate can_remove_pgs(pgcnt_t npgs) 14640Sstevel@tonic-gate { 14650Sstevel@tonic-gate /* 14660Sstevel@tonic-gate * If all pageable pages were paged out, freemem would 14670Sstevel@tonic-gate * equal availrmem. There is a minimum requirement for 14680Sstevel@tonic-gate * availrmem. 14690Sstevel@tonic-gate */ 14700Sstevel@tonic-gate if ((availrmem - (tune.t_minarmem + mem_delete_additional_pages)) 14710Sstevel@tonic-gate < npgs) 14720Sstevel@tonic-gate return (0); 14730Sstevel@tonic-gate /* TODO: check swap space, etc. */ 14740Sstevel@tonic-gate return (1); 14750Sstevel@tonic-gate } 14760Sstevel@tonic-gate 14770Sstevel@tonic-gate static int 14780Sstevel@tonic-gate get_availrmem(pgcnt_t npgs) 14790Sstevel@tonic-gate { 14800Sstevel@tonic-gate int ret; 14810Sstevel@tonic-gate 14820Sstevel@tonic-gate mutex_enter(&freemem_lock); 14830Sstevel@tonic-gate ret = can_remove_pgs(npgs); 14840Sstevel@tonic-gate if (ret != 0) 14850Sstevel@tonic-gate availrmem -= npgs; 14860Sstevel@tonic-gate mutex_exit(&freemem_lock); 14870Sstevel@tonic-gate return (ret); 14880Sstevel@tonic-gate } 14890Sstevel@tonic-gate 14900Sstevel@tonic-gate static void 14910Sstevel@tonic-gate put_availrmem(pgcnt_t npgs) 14920Sstevel@tonic-gate { 14930Sstevel@tonic-gate mutex_enter(&freemem_lock); 14940Sstevel@tonic-gate availrmem += npgs; 14950Sstevel@tonic-gate mutex_exit(&freemem_lock); 14960Sstevel@tonic-gate } 14970Sstevel@tonic-gate 14980Sstevel@tonic-gate #define FREEMEM_INCR 100 14990Sstevel@tonic-gate static pgcnt_t freemem_incr = FREEMEM_INCR; 15000Sstevel@tonic-gate #define DEL_FREE_WAIT_FRAC 4 15010Sstevel@tonic-gate #define DEL_FREE_WAIT_TICKS ((hz+DEL_FREE_WAIT_FRAC-1)/DEL_FREE_WAIT_FRAC) 15020Sstevel@tonic-gate 15030Sstevel@tonic-gate #define DEL_BUSY_WAIT_FRAC 20 15040Sstevel@tonic-gate #define DEL_BUSY_WAIT_TICKS ((hz+DEL_BUSY_WAIT_FRAC-1)/DEL_BUSY_WAIT_FRAC) 15050Sstevel@tonic-gate 15060Sstevel@tonic-gate static void kphysm_del_cleanup(struct mem_handle *); 15070Sstevel@tonic-gate 15080Sstevel@tonic-gate static void page_delete_collect(page_t *, struct mem_handle *); 15090Sstevel@tonic-gate 15100Sstevel@tonic-gate static pgcnt_t 15110Sstevel@tonic-gate delthr_get_freemem(struct mem_handle *mhp) 15120Sstevel@tonic-gate { 15130Sstevel@tonic-gate pgcnt_t free_get; 15140Sstevel@tonic-gate int ret; 15150Sstevel@tonic-gate 15160Sstevel@tonic-gate ASSERT(MUTEX_HELD(&mhp->mh_mutex)); 15170Sstevel@tonic-gate 15180Sstevel@tonic-gate MDSTAT_INCR(mhp, need_free); 15190Sstevel@tonic-gate /* 15200Sstevel@tonic-gate * Get up to freemem_incr pages. 15210Sstevel@tonic-gate */ 15220Sstevel@tonic-gate free_get = freemem_incr; 15230Sstevel@tonic-gate if (free_get > mhp->mh_hold_todo) 15240Sstevel@tonic-gate free_get = mhp->mh_hold_todo; 15250Sstevel@tonic-gate /* 15260Sstevel@tonic-gate * Take free_get pages away from freemem, 15270Sstevel@tonic-gate * waiting if necessary. 15280Sstevel@tonic-gate */ 15290Sstevel@tonic-gate 15300Sstevel@tonic-gate while (!mhp->mh_cancel) { 15310Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 15320Sstevel@tonic-gate MDSTAT_INCR(mhp, free_loop); 15330Sstevel@tonic-gate /* 15340Sstevel@tonic-gate * Duplicate test from page_create_throttle() 15350Sstevel@tonic-gate * but don't override with !PG_WAIT. 15360Sstevel@tonic-gate */ 15370Sstevel@tonic-gate if (freemem < (free_get + throttlefree)) { 15380Sstevel@tonic-gate MDSTAT_INCR(mhp, free_low); 15390Sstevel@tonic-gate ret = 0; 15400Sstevel@tonic-gate } else { 15410Sstevel@tonic-gate ret = page_create_wait(free_get, 0); 15420Sstevel@tonic-gate if (ret == 0) { 15430Sstevel@tonic-gate /* EMPTY */ 15440Sstevel@tonic-gate MDSTAT_INCR(mhp, free_failed); 15450Sstevel@tonic-gate } 15460Sstevel@tonic-gate } 15470Sstevel@tonic-gate if (ret != 0) { 15480Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 15490Sstevel@tonic-gate return (free_get); 15500Sstevel@tonic-gate } 15510Sstevel@tonic-gate 15520Sstevel@tonic-gate /* 15530Sstevel@tonic-gate * Put pressure on pageout. 15540Sstevel@tonic-gate */ 15550Sstevel@tonic-gate page_needfree(free_get); 15560Sstevel@tonic-gate cv_signal(&proc_pageout->p_cv); 15570Sstevel@tonic-gate 15580Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 15590Sstevel@tonic-gate (void) cv_timedwait(&mhp->mh_cv, &mhp->mh_mutex, 15600Sstevel@tonic-gate (lbolt + DEL_FREE_WAIT_TICKS)); 15610Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 15620Sstevel@tonic-gate page_needfree(-(spgcnt_t)free_get); 15630Sstevel@tonic-gate 15640Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 15650Sstevel@tonic-gate } 15660Sstevel@tonic-gate return (0); 15670Sstevel@tonic-gate } 15680Sstevel@tonic-gate 15690Sstevel@tonic-gate #define DR_AIO_CLEANUP_DELAY 25000 /* 0.025secs, in usec */ 15700Sstevel@tonic-gate #define DR_AIO_CLEANUP_MAXLOOPS_NODELAY 100 15710Sstevel@tonic-gate /* 15720Sstevel@tonic-gate * This function is run as a helper thread for delete_memory_thread. 15730Sstevel@tonic-gate * It is needed in order to force kaio cleanup, so that pages used in kaio 15740Sstevel@tonic-gate * will be unlocked and subsequently relocated by delete_memory_thread. 15750Sstevel@tonic-gate * The address of the delete_memory_threads's mem_handle is passed in to 15760Sstevel@tonic-gate * this thread function, and is used to set the mh_aio_cleanup_done member 15770Sstevel@tonic-gate * prior to calling thread_exit(). 15780Sstevel@tonic-gate */ 15790Sstevel@tonic-gate static void 15800Sstevel@tonic-gate dr_aio_cleanup_thread(caddr_t amhp) 15810Sstevel@tonic-gate { 15820Sstevel@tonic-gate proc_t *procp; 15830Sstevel@tonic-gate int (*aio_cleanup_dr_delete_memory)(proc_t *); 15840Sstevel@tonic-gate int cleaned; 15850Sstevel@tonic-gate int n = 0; 15860Sstevel@tonic-gate struct mem_handle *mhp; 15870Sstevel@tonic-gate volatile uint_t *pcancel; 15880Sstevel@tonic-gate 15890Sstevel@tonic-gate mhp = (struct mem_handle *)amhp; 15900Sstevel@tonic-gate ASSERT(mhp != NULL); 15910Sstevel@tonic-gate pcancel = &mhp->mh_dr_aio_cleanup_cancel; 15920Sstevel@tonic-gate if (modload("sys", "kaio") == -1) { 15930Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 1; 15940Sstevel@tonic-gate cmn_err(CE_WARN, "dr_aio_cleanup_thread: cannot load kaio"); 15950Sstevel@tonic-gate thread_exit(); 15960Sstevel@tonic-gate } 15970Sstevel@tonic-gate aio_cleanup_dr_delete_memory = (int (*)(proc_t *)) 15980Sstevel@tonic-gate modgetsymvalue("aio_cleanup_dr_delete_memory", 0); 15990Sstevel@tonic-gate if (aio_cleanup_dr_delete_memory == NULL) { 16000Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 1; 16010Sstevel@tonic-gate cmn_err(CE_WARN, 16020Sstevel@tonic-gate "aio_cleanup_dr_delete_memory not found in kaio"); 16030Sstevel@tonic-gate thread_exit(); 16040Sstevel@tonic-gate } 16050Sstevel@tonic-gate do { 16060Sstevel@tonic-gate cleaned = 0; 16070Sstevel@tonic-gate mutex_enter(&pidlock); 16080Sstevel@tonic-gate for (procp = practive; (*pcancel == 0) && (procp != NULL); 16090Sstevel@tonic-gate procp = procp->p_next) { 16100Sstevel@tonic-gate mutex_enter(&procp->p_lock); 16110Sstevel@tonic-gate if (procp->p_aio != NULL) { 16120Sstevel@tonic-gate /* cleanup proc's outstanding kaio */ 16130Sstevel@tonic-gate cleaned += 16140Sstevel@tonic-gate (*aio_cleanup_dr_delete_memory)(procp); 16150Sstevel@tonic-gate } 16160Sstevel@tonic-gate mutex_exit(&procp->p_lock); 16170Sstevel@tonic-gate } 16180Sstevel@tonic-gate mutex_exit(&pidlock); 16190Sstevel@tonic-gate if ((*pcancel == 0) && 16200Sstevel@tonic-gate (!cleaned || (++n == DR_AIO_CLEANUP_MAXLOOPS_NODELAY))) { 16210Sstevel@tonic-gate /* delay a bit before retrying all procs again */ 16220Sstevel@tonic-gate delay(drv_usectohz(DR_AIO_CLEANUP_DELAY)); 16230Sstevel@tonic-gate n = 0; 16240Sstevel@tonic-gate } 16250Sstevel@tonic-gate } while (*pcancel == 0); 16260Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 1; 16270Sstevel@tonic-gate thread_exit(); 16280Sstevel@tonic-gate } 16290Sstevel@tonic-gate 16300Sstevel@tonic-gate static void 16310Sstevel@tonic-gate delete_memory_thread(caddr_t amhp) 16320Sstevel@tonic-gate { 16330Sstevel@tonic-gate struct mem_handle *mhp; 16340Sstevel@tonic-gate struct memdelspan *mdsp; 16350Sstevel@tonic-gate callb_cpr_t cprinfo; 16360Sstevel@tonic-gate page_t *pp_targ; 16370Sstevel@tonic-gate spgcnt_t freemem_left; 16380Sstevel@tonic-gate void (*del_complete_funcp)(void *, int error); 16390Sstevel@tonic-gate void *del_complete_arg; 16400Sstevel@tonic-gate int comp_code; 16410Sstevel@tonic-gate int ret; 16420Sstevel@tonic-gate int first_scan; 16430Sstevel@tonic-gate uint_t szc; 16440Sstevel@tonic-gate #ifdef MEM_DEL_STATS 16450Sstevel@tonic-gate uint64_t start_total, ntick_total; 16460Sstevel@tonic-gate uint64_t start_pgrp, ntick_pgrp; 16470Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 16480Sstevel@tonic-gate 16490Sstevel@tonic-gate mhp = (struct mem_handle *)amhp; 16500Sstevel@tonic-gate 16510Sstevel@tonic-gate #ifdef MEM_DEL_STATS 16520Sstevel@tonic-gate start_total = ddi_get_lbolt(); 16530Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 16540Sstevel@tonic-gate 16550Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &mhp->mh_mutex, 16560Sstevel@tonic-gate callb_generic_cpr, "memdel"); 16570Sstevel@tonic-gate 16580Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 16590Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_STARTING); 16600Sstevel@tonic-gate 16610Sstevel@tonic-gate mhp->mh_state = MHND_RUNNING; 16620Sstevel@tonic-gate mhp->mh_thread_id = curthread; 16630Sstevel@tonic-gate 16640Sstevel@tonic-gate mhp->mh_hold_todo = mhp->mh_vm_pages; 16650Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 16660Sstevel@tonic-gate 16670Sstevel@tonic-gate /* Allocate the remap pages now, if necessary. */ 16680Sstevel@tonic-gate memseg_remap_init(); 16690Sstevel@tonic-gate 16700Sstevel@tonic-gate /* 16710Sstevel@tonic-gate * Subtract from availrmem now if possible as availrmem 16720Sstevel@tonic-gate * may not be available by the end of the delete. 16730Sstevel@tonic-gate */ 16740Sstevel@tonic-gate if (!get_availrmem(mhp->mh_vm_pages)) { 16750Sstevel@tonic-gate comp_code = KPHYSM_ENOTVIABLE; 16760Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 16770Sstevel@tonic-gate goto early_exit; 16780Sstevel@tonic-gate } 16790Sstevel@tonic-gate 16800Sstevel@tonic-gate ret = kphysm_setup_pre_del(mhp->mh_vm_pages); 16810Sstevel@tonic-gate 16820Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 16830Sstevel@tonic-gate 16840Sstevel@tonic-gate if (ret != 0) { 16850Sstevel@tonic-gate mhp->mh_cancel = KPHYSM_EREFUSED; 16860Sstevel@tonic-gate goto refused; 16870Sstevel@tonic-gate } 16880Sstevel@tonic-gate 16890Sstevel@tonic-gate transit_list_collect(mhp, 1); 16900Sstevel@tonic-gate 16910Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 16920Sstevel@tonic-gate mdsp = mdsp->mds_next) { 16930Sstevel@tonic-gate ASSERT(mdsp->mds_bitmap == NULL); 16940Sstevel@tonic-gate mdsp->mds_bitmap = kmem_zalloc(MDS_BITMAPBYTES(mdsp), KM_SLEEP); 16950Sstevel@tonic-gate mdsp->mds_bitmap_retired = kmem_zalloc(MDS_BITMAPBYTES(mdsp), 16960Sstevel@tonic-gate KM_SLEEP); 16970Sstevel@tonic-gate } 16980Sstevel@tonic-gate 16990Sstevel@tonic-gate first_scan = 1; 17000Sstevel@tonic-gate freemem_left = 0; 17010Sstevel@tonic-gate /* 17020Sstevel@tonic-gate * Start dr_aio_cleanup_thread, which periodically iterates 17030Sstevel@tonic-gate * through the process list and invokes aio cleanup. This 17040Sstevel@tonic-gate * is needed in order to avoid a deadly embrace between the 17050Sstevel@tonic-gate * delete_memory_thread (waiting on writer lock for page, with the 17060Sstevel@tonic-gate * exclusive-wanted bit set), kaio read request threads (waiting for a 17070Sstevel@tonic-gate * reader lock on the same page that is wanted by the 17080Sstevel@tonic-gate * delete_memory_thread), and threads waiting for kaio completion 17090Sstevel@tonic-gate * (blocked on spt_amp->lock). 17100Sstevel@tonic-gate */ 17110Sstevel@tonic-gate mhp->mh_dr_aio_cleanup_cancel = 0; 17120Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 0; 17130Sstevel@tonic-gate (void) thread_create(NULL, 0, dr_aio_cleanup_thread, 17140Sstevel@tonic-gate (caddr_t)mhp, 0, &p0, TS_RUN, maxclsyspri - 1); 17150Sstevel@tonic-gate while ((mhp->mh_hold_todo != 0) && (mhp->mh_cancel == 0)) { 17160Sstevel@tonic-gate pgcnt_t collected; 17170Sstevel@tonic-gate 17180Sstevel@tonic-gate MDSTAT_INCR(mhp, nloop); 17190Sstevel@tonic-gate collected = 0; 17200Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; (mdsp != NULL) && 17210Sstevel@tonic-gate (mhp->mh_cancel == 0); mdsp = mdsp->mds_next) { 17220Sstevel@tonic-gate pfn_t pfn, p_end; 17230Sstevel@tonic-gate 17240Sstevel@tonic-gate if (first_scan) { 17250Sstevel@tonic-gate mem_node_pre_del_slice(mdsp->mds_base, 17260Sstevel@tonic-gate mdsp->mds_base + mdsp->mds_npgs - 1); 17270Sstevel@tonic-gate } 17280Sstevel@tonic-gate 17290Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs; 17300Sstevel@tonic-gate for (pfn = mdsp->mds_base; (pfn < p_end) && 17310Sstevel@tonic-gate (mhp->mh_cancel == 0); pfn++) { 17320Sstevel@tonic-gate page_t *pp, *tpp, *tpp_targ; 17330Sstevel@tonic-gate pgcnt_t bit; 17340Sstevel@tonic-gate struct vnode *vp; 17350Sstevel@tonic-gate u_offset_t offset; 17360Sstevel@tonic-gate int mod, result; 17370Sstevel@tonic-gate spgcnt_t pgcnt; 17380Sstevel@tonic-gate 17390Sstevel@tonic-gate bit = pfn - mdsp->mds_base; 17400Sstevel@tonic-gate if ((mdsp->mds_bitmap[bit / NBPBMW] & 17410Sstevel@tonic-gate (1 << (bit % NBPBMW))) != 0) { 17420Sstevel@tonic-gate MDSTAT_INCR(mhp, already_done); 17430Sstevel@tonic-gate continue; 17440Sstevel@tonic-gate } 17450Sstevel@tonic-gate if (freemem_left == 0) { 17460Sstevel@tonic-gate freemem_left += delthr_get_freemem(mhp); 17470Sstevel@tonic-gate if (freemem_left == 0) 17480Sstevel@tonic-gate break; 17490Sstevel@tonic-gate } 17500Sstevel@tonic-gate 17510Sstevel@tonic-gate /* 17520Sstevel@tonic-gate * Release mh_mutex - some of this 17530Sstevel@tonic-gate * stuff takes some time (eg PUTPAGE). 17540Sstevel@tonic-gate */ 17550Sstevel@tonic-gate 17560Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 17570Sstevel@tonic-gate MDSTAT_INCR(mhp, ncheck); 17580Sstevel@tonic-gate 17590Sstevel@tonic-gate pp = page_numtopp_nolock(pfn); 17600Sstevel@tonic-gate if (pp == NULL) { 17610Sstevel@tonic-gate /* 17620Sstevel@tonic-gate * Not covered by a page_t - will 17630Sstevel@tonic-gate * be dealt with elsewhere. 17640Sstevel@tonic-gate */ 17650Sstevel@tonic-gate MDSTAT_INCR(mhp, nopaget); 17660Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 17670Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |= 17680Sstevel@tonic-gate (1 << (bit % NBPBMW)); 17690Sstevel@tonic-gate continue; 17700Sstevel@tonic-gate } 17710Sstevel@tonic-gate 17720Sstevel@tonic-gate if (!page_try_reclaim_lock(pp, SE_EXCL, 1773*917Selowe SE_EXCL_WANTED | SE_RETIRED)) { 1774*917Selowe /* 1775*917Selowe * Page in use elsewhere. Skip it. 1776*917Selowe */ 1777*917Selowe MDSTAT_INCR(mhp, lockfail); 1778*917Selowe mutex_enter(&mhp->mh_mutex); 1779*917Selowe continue; 17800Sstevel@tonic-gate } 17810Sstevel@tonic-gate /* 17820Sstevel@tonic-gate * See if the cage expanded into the delete. 17830Sstevel@tonic-gate * This can happen as we have to allow the 17840Sstevel@tonic-gate * cage to expand. 17850Sstevel@tonic-gate */ 17860Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 1787*917Selowe page_unlock(pp); 17880Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 17890Sstevel@tonic-gate mhp->mh_cancel = KPHYSM_ENONRELOC; 17900Sstevel@tonic-gate break; 17910Sstevel@tonic-gate } 1792*917Selowe if (PP_RETIRED(pp)) { 17930Sstevel@tonic-gate /* 17940Sstevel@tonic-gate * Page has been retired and is 17950Sstevel@tonic-gate * not part of the cage so we 17960Sstevel@tonic-gate * can now do the accounting for 17970Sstevel@tonic-gate * it. 17980Sstevel@tonic-gate */ 17990Sstevel@tonic-gate MDSTAT_INCR(mhp, retired); 18000Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 18010Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] 18020Sstevel@tonic-gate |= (1 << (bit % NBPBMW)); 18030Sstevel@tonic-gate mdsp->mds_bitmap_retired[bit / 18040Sstevel@tonic-gate NBPBMW] |= 18050Sstevel@tonic-gate (1 << (bit % NBPBMW)); 18060Sstevel@tonic-gate mhp->mh_hold_todo--; 18070Sstevel@tonic-gate continue; 18080Sstevel@tonic-gate } 18090Sstevel@tonic-gate ASSERT(freemem_left != 0); 18100Sstevel@tonic-gate if (PP_ISFREE(pp)) { 18110Sstevel@tonic-gate /* 18120Sstevel@tonic-gate * Like page_reclaim() only 'freemem' 18130Sstevel@tonic-gate * processing is already done. 18140Sstevel@tonic-gate */ 18150Sstevel@tonic-gate MDSTAT_INCR(mhp, nfree); 18160Sstevel@tonic-gate free_page_collect: 18170Sstevel@tonic-gate if (PP_ISAGED(pp)) { 18180Sstevel@tonic-gate page_list_sub(pp, 18190Sstevel@tonic-gate PG_FREE_LIST); 18200Sstevel@tonic-gate } else { 18210Sstevel@tonic-gate page_list_sub(pp, 18220Sstevel@tonic-gate PG_CACHE_LIST); 18230Sstevel@tonic-gate } 18240Sstevel@tonic-gate PP_CLRFREE(pp); 18250Sstevel@tonic-gate PP_CLRAGED(pp); 18260Sstevel@tonic-gate collected++; 18270Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 18280Sstevel@tonic-gate page_delete_collect(pp, mhp); 18290Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |= 18300Sstevel@tonic-gate (1 << (bit % NBPBMW)); 18310Sstevel@tonic-gate freemem_left--; 18320Sstevel@tonic-gate continue; 18330Sstevel@tonic-gate } 18340Sstevel@tonic-gate ASSERT(pp->p_vnode != NULL); 18350Sstevel@tonic-gate if (first_scan) { 18360Sstevel@tonic-gate MDSTAT_INCR(mhp, first_notfree); 18370Sstevel@tonic-gate page_unlock(pp); 18380Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 18390Sstevel@tonic-gate continue; 18400Sstevel@tonic-gate } 18410Sstevel@tonic-gate /* 18420Sstevel@tonic-gate * Keep stats on pages encountered that 1843*917Selowe * are marked for retirement. 18440Sstevel@tonic-gate */ 1845*917Selowe if (PP_TOXIC(pp)) { 18460Sstevel@tonic-gate MDSTAT_INCR(mhp, toxic); 1847*917Selowe } else if (PP_PR_REQ(pp)) { 18480Sstevel@tonic-gate MDSTAT_INCR(mhp, failing); 18490Sstevel@tonic-gate } 18500Sstevel@tonic-gate /* 18510Sstevel@tonic-gate * In certain cases below, special exceptions 18520Sstevel@tonic-gate * are made for pages that are toxic. This 18530Sstevel@tonic-gate * is because the current meaning of toxic 18540Sstevel@tonic-gate * is that an uncorrectable error has been 18550Sstevel@tonic-gate * previously associated with the page. 18560Sstevel@tonic-gate */ 18570Sstevel@tonic-gate if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) { 1858*917Selowe if (!PP_TOXIC(pp)) { 18590Sstevel@tonic-gate /* 18600Sstevel@tonic-gate * Must relocate locked in 18610Sstevel@tonic-gate * memory pages. 18620Sstevel@tonic-gate */ 18630Sstevel@tonic-gate #ifdef MEM_DEL_STATS 18640Sstevel@tonic-gate start_pgrp = ddi_get_lbolt(); 18650Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 18660Sstevel@tonic-gate /* 18670Sstevel@tonic-gate * Lock all constituent pages 18680Sstevel@tonic-gate * of a large page to ensure 18690Sstevel@tonic-gate * that p_szc won't change. 18700Sstevel@tonic-gate */ 18710Sstevel@tonic-gate if (!group_page_trylock(pp, 18720Sstevel@tonic-gate SE_EXCL)) { 18730Sstevel@tonic-gate MDSTAT_INCR(mhp, 18740Sstevel@tonic-gate gptllckfail); 18750Sstevel@tonic-gate page_unlock(pp); 18760Sstevel@tonic-gate mutex_enter( 18770Sstevel@tonic-gate &mhp->mh_mutex); 18780Sstevel@tonic-gate continue; 18790Sstevel@tonic-gate } 18800Sstevel@tonic-gate MDSTAT_INCR(mhp, npplocked); 18810Sstevel@tonic-gate pp_targ = 18820Sstevel@tonic-gate page_get_replacement_page( 18830Sstevel@tonic-gate pp, NULL, 0); 18840Sstevel@tonic-gate if (pp_targ != NULL) { 18850Sstevel@tonic-gate #ifdef MEM_DEL_STATS 18860Sstevel@tonic-gate ntick_pgrp = 18870Sstevel@tonic-gate (uint64_t) 18880Sstevel@tonic-gate ddi_get_lbolt() - 18890Sstevel@tonic-gate start_pgrp; 18900Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 18910Sstevel@tonic-gate MDSTAT_PGRP(mhp, 18920Sstevel@tonic-gate ntick_pgrp); 18930Sstevel@tonic-gate MDSTAT_INCR(mhp, 18940Sstevel@tonic-gate nlockreloc); 18950Sstevel@tonic-gate goto reloc; 18960Sstevel@tonic-gate } 18970Sstevel@tonic-gate group_page_unlock(pp); 18980Sstevel@tonic-gate page_unlock(pp); 18990Sstevel@tonic-gate #ifdef MEM_DEL_STATS 19000Sstevel@tonic-gate ntick_pgrp = 19010Sstevel@tonic-gate (uint64_t)ddi_get_lbolt() - 19020Sstevel@tonic-gate start_pgrp; 19030Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 19040Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 19050Sstevel@tonic-gate MDSTAT_INCR(mhp, nnorepl); 19060Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 19070Sstevel@tonic-gate continue; 19080Sstevel@tonic-gate } else { 19090Sstevel@tonic-gate /* 19100Sstevel@tonic-gate * Cannot do anything about 19110Sstevel@tonic-gate * this page because it is 19120Sstevel@tonic-gate * toxic. 19130Sstevel@tonic-gate */ 19140Sstevel@tonic-gate MDSTAT_INCR(mhp, npplkdtoxic); 19150Sstevel@tonic-gate page_unlock(pp); 19160Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 19170Sstevel@tonic-gate continue; 19180Sstevel@tonic-gate } 19190Sstevel@tonic-gate } 19200Sstevel@tonic-gate /* 19210Sstevel@tonic-gate * Unload the mappings and check if mod bit 19220Sstevel@tonic-gate * is set. 19230Sstevel@tonic-gate */ 19240Sstevel@tonic-gate ASSERT(pp->p_vnode != &kvp); 19250Sstevel@tonic-gate (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD); 19260Sstevel@tonic-gate mod = hat_ismod(pp); 19270Sstevel@tonic-gate 19280Sstevel@tonic-gate #ifdef MEM_DEL_STATS 19290Sstevel@tonic-gate start_pgrp = ddi_get_lbolt(); 19300Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 1931*917Selowe if (mod && !PP_TOXIC(pp)) { 19320Sstevel@tonic-gate /* 19330Sstevel@tonic-gate * Lock all constituent pages 19340Sstevel@tonic-gate * of a large page to ensure 19350Sstevel@tonic-gate * that p_szc won't change. 19360Sstevel@tonic-gate */ 19370Sstevel@tonic-gate if (!group_page_trylock(pp, SE_EXCL)) { 19380Sstevel@tonic-gate MDSTAT_INCR(mhp, gptlmodfail); 19390Sstevel@tonic-gate page_unlock(pp); 19400Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 19410Sstevel@tonic-gate continue; 19420Sstevel@tonic-gate } 19430Sstevel@tonic-gate pp_targ = page_get_replacement_page(pp, 19440Sstevel@tonic-gate NULL, 0); 19450Sstevel@tonic-gate if (pp_targ != NULL) { 19460Sstevel@tonic-gate MDSTAT_INCR(mhp, nmodreloc); 19470Sstevel@tonic-gate #ifdef MEM_DEL_STATS 19480Sstevel@tonic-gate ntick_pgrp = 19490Sstevel@tonic-gate (uint64_t)ddi_get_lbolt() - 19500Sstevel@tonic-gate start_pgrp; 19510Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 19520Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 19530Sstevel@tonic-gate goto reloc; 19540Sstevel@tonic-gate } 19550Sstevel@tonic-gate group_page_unlock(pp); 19560Sstevel@tonic-gate } 19570Sstevel@tonic-gate 19580Sstevel@tonic-gate if (!page_try_demote_pages(pp)) { 19590Sstevel@tonic-gate MDSTAT_INCR(mhp, demotefail); 19600Sstevel@tonic-gate page_unlock(pp); 19610Sstevel@tonic-gate #ifdef MEM_DEL_STATS 19620Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() - 19630Sstevel@tonic-gate start_pgrp; 19640Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 19650Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 19660Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 19670Sstevel@tonic-gate continue; 19680Sstevel@tonic-gate } 19690Sstevel@tonic-gate 19700Sstevel@tonic-gate /* 19710Sstevel@tonic-gate * Regular 'page-out'. 19720Sstevel@tonic-gate */ 19730Sstevel@tonic-gate if (!mod) { 19740Sstevel@tonic-gate MDSTAT_INCR(mhp, ndestroy); 19750Sstevel@tonic-gate page_destroy(pp, 1); 19760Sstevel@tonic-gate /* 19770Sstevel@tonic-gate * page_destroy was called with 19780Sstevel@tonic-gate * dontfree. As long as p_lckcnt 19790Sstevel@tonic-gate * and p_cowcnt are both zero, the 19800Sstevel@tonic-gate * only additional action of 19810Sstevel@tonic-gate * page_destroy with !dontfree is to 19820Sstevel@tonic-gate * call page_free, so we can collect 19830Sstevel@tonic-gate * the page here. 19840Sstevel@tonic-gate */ 19850Sstevel@tonic-gate collected++; 19860Sstevel@tonic-gate #ifdef MEM_DEL_STATS 19870Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() - 19880Sstevel@tonic-gate start_pgrp; 19890Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 19900Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 19910Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 19920Sstevel@tonic-gate page_delete_collect(pp, mhp); 19930Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |= 19940Sstevel@tonic-gate (1 << (bit % NBPBMW)); 19950Sstevel@tonic-gate continue; 19960Sstevel@tonic-gate } 19970Sstevel@tonic-gate /* 19980Sstevel@tonic-gate * The page is toxic and the mod bit is 19990Sstevel@tonic-gate * set, we cannot do anything here to deal 20000Sstevel@tonic-gate * with it. 20010Sstevel@tonic-gate */ 2002*917Selowe if (PP_TOXIC(pp)) { 20030Sstevel@tonic-gate page_unlock(pp); 20040Sstevel@tonic-gate #ifdef MEM_DEL_STATS 20050Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() - 20060Sstevel@tonic-gate start_pgrp; 20070Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 20080Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 20090Sstevel@tonic-gate MDSTAT_INCR(mhp, modtoxic); 20100Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 20110Sstevel@tonic-gate continue; 20120Sstevel@tonic-gate } 20130Sstevel@tonic-gate MDSTAT_INCR(mhp, nputpage); 20140Sstevel@tonic-gate vp = pp->p_vnode; 20150Sstevel@tonic-gate offset = pp->p_offset; 20160Sstevel@tonic-gate VN_HOLD(vp); 20170Sstevel@tonic-gate page_unlock(pp); 20180Sstevel@tonic-gate (void) VOP_PUTPAGE(vp, offset, PAGESIZE, 20190Sstevel@tonic-gate B_INVAL|B_FORCE, kcred); 20200Sstevel@tonic-gate VN_RELE(vp); 20210Sstevel@tonic-gate #ifdef MEM_DEL_STATS 20220Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() - 20230Sstevel@tonic-gate start_pgrp; 20240Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 20250Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 20260Sstevel@tonic-gate /* 20270Sstevel@tonic-gate * Try to get the page back immediately 20280Sstevel@tonic-gate * so that it can be collected. 20290Sstevel@tonic-gate */ 20300Sstevel@tonic-gate pp = page_numtopp_nolock(pfn); 20310Sstevel@tonic-gate if (pp == NULL) { 20320Sstevel@tonic-gate MDSTAT_INCR(mhp, nnoreclaim); 20330Sstevel@tonic-gate /* 20340Sstevel@tonic-gate * This should not happen as this 20350Sstevel@tonic-gate * thread is deleting the page. 20360Sstevel@tonic-gate * If this code is generalized, this 20370Sstevel@tonic-gate * becomes a reality. 20380Sstevel@tonic-gate */ 20390Sstevel@tonic-gate #ifdef DEBUG 20400Sstevel@tonic-gate cmn_err(CE_WARN, 20410Sstevel@tonic-gate "delete_memory_thread(0x%p) " 20420Sstevel@tonic-gate "pfn 0x%lx has no page_t", 20430Sstevel@tonic-gate (void *)mhp, pfn); 20440Sstevel@tonic-gate #endif /* DEBUG */ 20450Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 20460Sstevel@tonic-gate continue; 20470Sstevel@tonic-gate } 20480Sstevel@tonic-gate if (page_try_reclaim_lock(pp, SE_EXCL, 2049*917Selowe SE_EXCL_WANTED | SE_RETIRED)) { 20500Sstevel@tonic-gate if (PP_ISFREE(pp)) { 20510Sstevel@tonic-gate goto free_page_collect; 20520Sstevel@tonic-gate } 20530Sstevel@tonic-gate page_unlock(pp); 20540Sstevel@tonic-gate } 20550Sstevel@tonic-gate MDSTAT_INCR(mhp, nnoreclaim); 20560Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 20570Sstevel@tonic-gate continue; 20580Sstevel@tonic-gate 20590Sstevel@tonic-gate reloc: 20600Sstevel@tonic-gate /* 20610Sstevel@tonic-gate * Got some freemem and a target 20620Sstevel@tonic-gate * page, so move the data to avoid 20630Sstevel@tonic-gate * I/O and lock problems. 20640Sstevel@tonic-gate */ 20650Sstevel@tonic-gate ASSERT(!page_iolock_assert(pp)); 20660Sstevel@tonic-gate MDSTAT_INCR(mhp, nreloc); 20670Sstevel@tonic-gate /* 20680Sstevel@tonic-gate * page_relocate() will return pgcnt: the 20690Sstevel@tonic-gate * number of consecutive pages relocated. 20700Sstevel@tonic-gate * If it is successful, pp will be a 20710Sstevel@tonic-gate * linked list of the page structs that 20720Sstevel@tonic-gate * were relocated. If page_relocate() is 20730Sstevel@tonic-gate * unsuccessful, pp will be unmodified. 20740Sstevel@tonic-gate */ 20750Sstevel@tonic-gate #ifdef MEM_DEL_STATS 20760Sstevel@tonic-gate start_pgrp = ddi_get_lbolt(); 20770Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 20780Sstevel@tonic-gate result = page_relocate(&pp, &pp_targ, 0, 0, 20790Sstevel@tonic-gate &pgcnt, NULL); 20800Sstevel@tonic-gate #ifdef MEM_DEL_STATS 20810Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() - 20820Sstevel@tonic-gate start_pgrp; 20830Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 20840Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 20850Sstevel@tonic-gate if (result != 0) { 20860Sstevel@tonic-gate MDSTAT_INCR(mhp, nrelocfail); 20870Sstevel@tonic-gate /* 20880Sstevel@tonic-gate * We did not succeed. We need 20890Sstevel@tonic-gate * to give the pp_targ pages back. 20900Sstevel@tonic-gate * page_free(pp_targ, 1) without 20910Sstevel@tonic-gate * the freemem accounting. 20920Sstevel@tonic-gate */ 20930Sstevel@tonic-gate group_page_unlock(pp); 20940Sstevel@tonic-gate page_free_replacement_page(pp_targ); 20950Sstevel@tonic-gate page_unlock(pp); 20960Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 20970Sstevel@tonic-gate continue; 20980Sstevel@tonic-gate } 20990Sstevel@tonic-gate 21000Sstevel@tonic-gate /* 21010Sstevel@tonic-gate * We will then collect pgcnt pages. 21020Sstevel@tonic-gate */ 21030Sstevel@tonic-gate ASSERT(pgcnt > 0); 21040Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 21050Sstevel@tonic-gate /* 21060Sstevel@tonic-gate * We need to make sure freemem_left is 21070Sstevel@tonic-gate * large enough. 21080Sstevel@tonic-gate */ 21090Sstevel@tonic-gate while ((freemem_left < pgcnt) && 21100Sstevel@tonic-gate (!mhp->mh_cancel)) { 21110Sstevel@tonic-gate freemem_left += 21120Sstevel@tonic-gate delthr_get_freemem(mhp); 21130Sstevel@tonic-gate } 21140Sstevel@tonic-gate 21150Sstevel@tonic-gate /* 21160Sstevel@tonic-gate * Do not proceed if mh_cancel is set. 21170Sstevel@tonic-gate */ 21180Sstevel@tonic-gate if (mhp->mh_cancel) { 21190Sstevel@tonic-gate while (pp_targ != NULL) { 21200Sstevel@tonic-gate /* 21210Sstevel@tonic-gate * Unlink and unlock each page. 21220Sstevel@tonic-gate */ 21230Sstevel@tonic-gate tpp_targ = pp_targ; 21240Sstevel@tonic-gate page_sub(&pp_targ, tpp_targ); 21250Sstevel@tonic-gate page_unlock(tpp_targ); 21260Sstevel@tonic-gate } 21270Sstevel@tonic-gate /* 21280Sstevel@tonic-gate * We need to give the pp pages back. 21290Sstevel@tonic-gate * page_free(pp, 1) without the 21300Sstevel@tonic-gate * freemem accounting. 21310Sstevel@tonic-gate */ 21320Sstevel@tonic-gate page_free_replacement_page(pp); 21330Sstevel@tonic-gate break; 21340Sstevel@tonic-gate } 21350Sstevel@tonic-gate 21360Sstevel@tonic-gate /* Now remove pgcnt from freemem_left */ 21370Sstevel@tonic-gate freemem_left -= pgcnt; 21380Sstevel@tonic-gate ASSERT(freemem_left >= 0); 21390Sstevel@tonic-gate szc = pp->p_szc; 21400Sstevel@tonic-gate while (pp != NULL) { 21410Sstevel@tonic-gate /* 21420Sstevel@tonic-gate * pp and pp_targ were passed back as 21430Sstevel@tonic-gate * a linked list of pages. 21440Sstevel@tonic-gate * Unlink and unlock each page. 21450Sstevel@tonic-gate */ 21460Sstevel@tonic-gate tpp_targ = pp_targ; 21470Sstevel@tonic-gate page_sub(&pp_targ, tpp_targ); 21480Sstevel@tonic-gate page_unlock(tpp_targ); 21490Sstevel@tonic-gate /* 21500Sstevel@tonic-gate * The original page is now free 21510Sstevel@tonic-gate * so remove it from the linked 21520Sstevel@tonic-gate * list and collect it. 21530Sstevel@tonic-gate */ 21540Sstevel@tonic-gate tpp = pp; 21550Sstevel@tonic-gate page_sub(&pp, tpp); 21560Sstevel@tonic-gate pfn = page_pptonum(tpp); 21570Sstevel@tonic-gate collected++; 21580Sstevel@tonic-gate ASSERT(PAGE_EXCL(tpp)); 21590Sstevel@tonic-gate ASSERT(tpp->p_vnode == NULL); 21600Sstevel@tonic-gate ASSERT(!hat_page_is_mapped(tpp)); 21610Sstevel@tonic-gate ASSERT(tpp->p_szc == szc); 21620Sstevel@tonic-gate tpp->p_szc = 0; 21630Sstevel@tonic-gate page_delete_collect(tpp, mhp); 21640Sstevel@tonic-gate bit = pfn - mdsp->mds_base; 21650Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |= 21660Sstevel@tonic-gate (1 << (bit % NBPBMW)); 21670Sstevel@tonic-gate } 21680Sstevel@tonic-gate ASSERT(pp_targ == NULL); 21690Sstevel@tonic-gate } 21700Sstevel@tonic-gate } 21710Sstevel@tonic-gate first_scan = 0; 21720Sstevel@tonic-gate if ((mhp->mh_cancel == 0) && (mhp->mh_hold_todo != 0) && 21730Sstevel@tonic-gate (collected == 0)) { 21740Sstevel@tonic-gate /* 21750Sstevel@tonic-gate * This code is needed as we cannot wait 21760Sstevel@tonic-gate * for a page to be locked OR the delete to 21770Sstevel@tonic-gate * be cancelled. Also, we must delay so 21780Sstevel@tonic-gate * that other threads get a chance to run 21790Sstevel@tonic-gate * on our cpu, otherwise page locks may be 21800Sstevel@tonic-gate * held indefinitely by those threads. 21810Sstevel@tonic-gate */ 21820Sstevel@tonic-gate MDSTAT_INCR(mhp, ndelay); 21830Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 21840Sstevel@tonic-gate (void) cv_timedwait(&mhp->mh_cv, &mhp->mh_mutex, 21850Sstevel@tonic-gate (lbolt + DEL_BUSY_WAIT_TICKS)); 21860Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &mhp->mh_mutex); 21870Sstevel@tonic-gate } 21880Sstevel@tonic-gate } 21890Sstevel@tonic-gate /* stop the dr aio cleanup thread */ 21900Sstevel@tonic-gate mhp->mh_dr_aio_cleanup_cancel = 1; 21910Sstevel@tonic-gate transit_list_collect(mhp, 0); 21920Sstevel@tonic-gate if (freemem_left != 0) { 21930Sstevel@tonic-gate /* Return any surplus. */ 21940Sstevel@tonic-gate page_create_putback(freemem_left); 21950Sstevel@tonic-gate freemem_left = 0; 21960Sstevel@tonic-gate } 21970Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 21980Sstevel@tonic-gate mdsp = mdsp->mds_next) { 21990Sstevel@tonic-gate mem_node_post_del_slice(mdsp->mds_base, 22000Sstevel@tonic-gate mdsp->mds_base + mdsp->mds_npgs - 1, 22010Sstevel@tonic-gate (mhp->mh_cancel != 0)); 22020Sstevel@tonic-gate } 22030Sstevel@tonic-gate #ifdef MEM_DEL_STATS 22040Sstevel@tonic-gate ntick_total = (uint64_t)ddi_get_lbolt() - start_total; 22050Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 22060Sstevel@tonic-gate MDSTAT_TOTAL(mhp, ntick_total); 22070Sstevel@tonic-gate MDSTAT_PRINT(mhp); 22080Sstevel@tonic-gate 22090Sstevel@tonic-gate /* 22100Sstevel@tonic-gate * If the memory delete was cancelled, exclusive-wanted bits must 2211*917Selowe * be cleared. If there are retired pages being deleted, they need 2212*917Selowe * to be unretired. 22130Sstevel@tonic-gate */ 22140Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 22150Sstevel@tonic-gate mdsp = mdsp->mds_next) { 22160Sstevel@tonic-gate pfn_t pfn, p_end; 22170Sstevel@tonic-gate 22180Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs; 22190Sstevel@tonic-gate for (pfn = mdsp->mds_base; pfn < p_end; pfn++) { 22200Sstevel@tonic-gate page_t *pp; 22210Sstevel@tonic-gate pgcnt_t bit; 22220Sstevel@tonic-gate 22230Sstevel@tonic-gate bit = pfn - mdsp->mds_base; 22240Sstevel@tonic-gate if (mhp->mh_cancel) { 22250Sstevel@tonic-gate pp = page_numtopp_nolock(pfn); 22260Sstevel@tonic-gate if (pp != NULL) { 22270Sstevel@tonic-gate if ((mdsp->mds_bitmap[bit / NBPBMW] & 22280Sstevel@tonic-gate (1 << (bit % NBPBMW))) == 0) { 22290Sstevel@tonic-gate page_lock_clr_exclwanted(pp); 22300Sstevel@tonic-gate } 22310Sstevel@tonic-gate } 22320Sstevel@tonic-gate } else { 22330Sstevel@tonic-gate pp = NULL; 22340Sstevel@tonic-gate } 22350Sstevel@tonic-gate if ((mdsp->mds_bitmap_retired[bit / NBPBMW] & 22360Sstevel@tonic-gate (1 << (bit % NBPBMW))) != 0) { 22370Sstevel@tonic-gate /* do we already have pp? */ 22380Sstevel@tonic-gate if (pp == NULL) { 22390Sstevel@tonic-gate pp = page_numtopp_nolock(pfn); 22400Sstevel@tonic-gate } 22410Sstevel@tonic-gate ASSERT(pp != NULL); 2242*917Selowe ASSERT(PP_RETIRED(pp)); 22430Sstevel@tonic-gate if (mhp->mh_cancel != 0) { 2244*917Selowe page_unlock(pp); 22450Sstevel@tonic-gate /* 22460Sstevel@tonic-gate * To satisfy ASSERT below in 22470Sstevel@tonic-gate * cancel code. 22480Sstevel@tonic-gate */ 22490Sstevel@tonic-gate mhp->mh_hold_todo++; 22500Sstevel@tonic-gate } else { 2251*917Selowe (void) page_unretire_pp(pp, 0); 22520Sstevel@tonic-gate } 22530Sstevel@tonic-gate } 22540Sstevel@tonic-gate } 22550Sstevel@tonic-gate } 22560Sstevel@tonic-gate /* 22570Sstevel@tonic-gate * Free retired page bitmap and collected page bitmap 22580Sstevel@tonic-gate */ 22590Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 22600Sstevel@tonic-gate mdsp = mdsp->mds_next) { 22610Sstevel@tonic-gate ASSERT(mdsp->mds_bitmap_retired != NULL); 22620Sstevel@tonic-gate kmem_free(mdsp->mds_bitmap_retired, MDS_BITMAPBYTES(mdsp)); 22630Sstevel@tonic-gate mdsp->mds_bitmap_retired = NULL; /* Paranoia. */ 22640Sstevel@tonic-gate ASSERT(mdsp->mds_bitmap != NULL); 22650Sstevel@tonic-gate kmem_free(mdsp->mds_bitmap, MDS_BITMAPBYTES(mdsp)); 22660Sstevel@tonic-gate mdsp->mds_bitmap = NULL; /* Paranoia. */ 22670Sstevel@tonic-gate } 22680Sstevel@tonic-gate 22690Sstevel@tonic-gate /* wait for our dr aio cancel thread to exit */ 22700Sstevel@tonic-gate while (!(mhp->mh_aio_cleanup_done)) { 22710Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 22720Sstevel@tonic-gate delay(drv_usectohz(DR_AIO_CLEANUP_DELAY)); 22730Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &mhp->mh_mutex); 22740Sstevel@tonic-gate } 22750Sstevel@tonic-gate refused: 22760Sstevel@tonic-gate if (mhp->mh_cancel != 0) { 22770Sstevel@tonic-gate page_t *pp; 22780Sstevel@tonic-gate 22790Sstevel@tonic-gate comp_code = mhp->mh_cancel; 22800Sstevel@tonic-gate /* 22810Sstevel@tonic-gate * Go through list of deleted pages (mh_deleted) freeing 22820Sstevel@tonic-gate * them. 22830Sstevel@tonic-gate */ 22840Sstevel@tonic-gate while ((pp = mhp->mh_deleted) != NULL) { 22850Sstevel@tonic-gate mhp->mh_deleted = pp->p_next; 22860Sstevel@tonic-gate mhp->mh_hold_todo++; 22870Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 22880Sstevel@tonic-gate /* Restore p_next. */ 22890Sstevel@tonic-gate pp->p_next = pp->p_prev; 22900Sstevel@tonic-gate if (PP_ISFREE(pp)) { 22910Sstevel@tonic-gate cmn_err(CE_PANIC, 22920Sstevel@tonic-gate "page %p is free", 22930Sstevel@tonic-gate (void *)pp); 22940Sstevel@tonic-gate } 22950Sstevel@tonic-gate page_free(pp, 1); 22960Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 22970Sstevel@tonic-gate } 22980Sstevel@tonic-gate ASSERT(mhp->mh_hold_todo == mhp->mh_vm_pages); 22990Sstevel@tonic-gate 23000Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 23010Sstevel@tonic-gate put_availrmem(mhp->mh_vm_pages); 23020Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 23030Sstevel@tonic-gate 23040Sstevel@tonic-gate goto t_exit; 23050Sstevel@tonic-gate } 23060Sstevel@tonic-gate 23070Sstevel@tonic-gate /* 23080Sstevel@tonic-gate * All the pages are no longer in use and are exclusively locked. 23090Sstevel@tonic-gate */ 23100Sstevel@tonic-gate 23110Sstevel@tonic-gate mhp->mh_deleted = NULL; 23120Sstevel@tonic-gate 23130Sstevel@tonic-gate kphysm_del_cleanup(mhp); 23140Sstevel@tonic-gate 23150Sstevel@tonic-gate comp_code = KPHYSM_OK; 23160Sstevel@tonic-gate 23170Sstevel@tonic-gate t_exit: 23180Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 23190Sstevel@tonic-gate kphysm_setup_post_del(mhp->mh_vm_pages, 23200Sstevel@tonic-gate (comp_code == KPHYSM_OK) ? 0 : 1); 23210Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 23220Sstevel@tonic-gate 23230Sstevel@tonic-gate early_exit: 23240Sstevel@tonic-gate /* mhp->mh_mutex exited by CALLB_CPR_EXIT() */ 23250Sstevel@tonic-gate mhp->mh_state = MHND_DONE; 23260Sstevel@tonic-gate del_complete_funcp = mhp->mh_delete_complete; 23270Sstevel@tonic-gate del_complete_arg = mhp->mh_delete_complete_arg; 23280Sstevel@tonic-gate CALLB_CPR_EXIT(&cprinfo); 23290Sstevel@tonic-gate (*del_complete_funcp)(del_complete_arg, comp_code); 23300Sstevel@tonic-gate thread_exit(); 23310Sstevel@tonic-gate /*NOTREACHED*/ 23320Sstevel@tonic-gate } 23330Sstevel@tonic-gate 23340Sstevel@tonic-gate /* 23350Sstevel@tonic-gate * Start the delete of the memory from the system. 23360Sstevel@tonic-gate */ 23370Sstevel@tonic-gate int 23380Sstevel@tonic-gate kphysm_del_start( 23390Sstevel@tonic-gate memhandle_t handle, 23400Sstevel@tonic-gate void (*complete)(void *, int), 23410Sstevel@tonic-gate void *complete_arg) 23420Sstevel@tonic-gate { 23430Sstevel@tonic-gate struct mem_handle *mhp; 23440Sstevel@tonic-gate 23450Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle); 23460Sstevel@tonic-gate if (mhp == NULL) { 23470Sstevel@tonic-gate return (KPHYSM_EHANDLE); 23480Sstevel@tonic-gate } 23490Sstevel@tonic-gate switch (mhp->mh_state) { 23500Sstevel@tonic-gate case MHND_FREE: 23510Sstevel@tonic-gate ASSERT(mhp->mh_state != MHND_FREE); 23520Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 23530Sstevel@tonic-gate return (KPHYSM_EHANDLE); 23540Sstevel@tonic-gate case MHND_INIT: 23550Sstevel@tonic-gate break; 23560Sstevel@tonic-gate case MHND_STARTING: 23570Sstevel@tonic-gate case MHND_RUNNING: 23580Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 23590Sstevel@tonic-gate return (KPHYSM_ESEQUENCE); 23600Sstevel@tonic-gate case MHND_DONE: 23610Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 23620Sstevel@tonic-gate return (KPHYSM_ESEQUENCE); 23630Sstevel@tonic-gate case MHND_RELEASE: 23640Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 23650Sstevel@tonic-gate return (KPHYSM_ESEQUENCE); 23660Sstevel@tonic-gate default: 23670Sstevel@tonic-gate #ifdef DEBUG 23680Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_del_start(0x%p) state corrupt %d", 23690Sstevel@tonic-gate (void *)mhp, mhp->mh_state); 23700Sstevel@tonic-gate #endif /* DEBUG */ 23710Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 23720Sstevel@tonic-gate return (KPHYSM_EHANDLE); 23730Sstevel@tonic-gate } 23740Sstevel@tonic-gate 23750Sstevel@tonic-gate if (mhp->mh_transit.trl_spans == NULL) { 23760Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 23770Sstevel@tonic-gate return (KPHYSM_ENOWORK); 23780Sstevel@tonic-gate } 23790Sstevel@tonic-gate 23800Sstevel@tonic-gate ASSERT(complete != NULL); 23810Sstevel@tonic-gate mhp->mh_delete_complete = complete; 23820Sstevel@tonic-gate mhp->mh_delete_complete_arg = complete_arg; 23830Sstevel@tonic-gate mhp->mh_state = MHND_STARTING; 23840Sstevel@tonic-gate /* 23850Sstevel@tonic-gate * Release the mutex in case thread_create sleeps. 23860Sstevel@tonic-gate */ 23870Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 23880Sstevel@tonic-gate 23890Sstevel@tonic-gate /* 23900Sstevel@tonic-gate * The "obvious" process for this thread is pageout (proc_pageout) 23910Sstevel@tonic-gate * but this gives the thread too much power over freemem 23920Sstevel@tonic-gate * which results in freemem starvation. 23930Sstevel@tonic-gate */ 23940Sstevel@tonic-gate (void) thread_create(NULL, 0, delete_memory_thread, mhp, 0, &p0, 23950Sstevel@tonic-gate TS_RUN, maxclsyspri - 1); 23960Sstevel@tonic-gate 23970Sstevel@tonic-gate return (KPHYSM_OK); 23980Sstevel@tonic-gate } 23990Sstevel@tonic-gate 24000Sstevel@tonic-gate static kmutex_t pp_dummy_lock; /* Protects init. of pp_dummy. */ 24010Sstevel@tonic-gate static caddr_t pp_dummy; 24020Sstevel@tonic-gate static pgcnt_t pp_dummy_npages; 24030Sstevel@tonic-gate static pfn_t *pp_dummy_pfn; /* Array of dummy pfns. */ 24040Sstevel@tonic-gate 24050Sstevel@tonic-gate static void 24060Sstevel@tonic-gate memseg_remap_init_pages(page_t *pages, page_t *epages) 24070Sstevel@tonic-gate { 24080Sstevel@tonic-gate page_t *pp; 24090Sstevel@tonic-gate 24100Sstevel@tonic-gate for (pp = pages; pp < epages; pp++) { 24110Sstevel@tonic-gate pp->p_pagenum = PFN_INVALID; /* XXXX */ 24120Sstevel@tonic-gate pp->p_offset = (u_offset_t)-1; 24130Sstevel@tonic-gate page_iolock_init(pp); 24140Sstevel@tonic-gate while (!page_lock(pp, SE_EXCL, (kmutex_t *)NULL, P_RECLAIM)) 24150Sstevel@tonic-gate continue; 24160Sstevel@tonic-gate page_lock_delete(pp); 24170Sstevel@tonic-gate } 24180Sstevel@tonic-gate } 24190Sstevel@tonic-gate 24200Sstevel@tonic-gate void 24210Sstevel@tonic-gate memseg_remap_init() 24220Sstevel@tonic-gate { 24230Sstevel@tonic-gate mutex_enter(&pp_dummy_lock); 24240Sstevel@tonic-gate if (pp_dummy == NULL) { 24250Sstevel@tonic-gate uint_t dpages; 24260Sstevel@tonic-gate int i; 24270Sstevel@tonic-gate 24280Sstevel@tonic-gate /* 24290Sstevel@tonic-gate * dpages starts off as the size of the structure and 24300Sstevel@tonic-gate * ends up as the minimum number of pages that will 24310Sstevel@tonic-gate * hold a whole number of page_t structures. 24320Sstevel@tonic-gate */ 24330Sstevel@tonic-gate dpages = sizeof (page_t); 24340Sstevel@tonic-gate ASSERT(dpages != 0); 24350Sstevel@tonic-gate ASSERT(dpages <= MMU_PAGESIZE); 24360Sstevel@tonic-gate 24370Sstevel@tonic-gate while ((dpages & 1) == 0) 24380Sstevel@tonic-gate dpages >>= 1; 24390Sstevel@tonic-gate 24400Sstevel@tonic-gate pp_dummy_npages = dpages; 24410Sstevel@tonic-gate /* 24420Sstevel@tonic-gate * Allocate pp_dummy pages directly from static_arena, 24430Sstevel@tonic-gate * since these are whole page allocations and are 24440Sstevel@tonic-gate * referenced by physical address. This also has the 24450Sstevel@tonic-gate * nice fringe benefit of hiding the memory from 24460Sstevel@tonic-gate * ::findleaks since it doesn't deal well with allocated 24470Sstevel@tonic-gate * kernel heap memory that doesn't have any mappings. 24480Sstevel@tonic-gate */ 24490Sstevel@tonic-gate pp_dummy = vmem_xalloc(static_arena, ptob(pp_dummy_npages), 24500Sstevel@tonic-gate PAGESIZE, 0, 0, NULL, NULL, VM_SLEEP); 24510Sstevel@tonic-gate bzero(pp_dummy, ptob(pp_dummy_npages)); 24520Sstevel@tonic-gate ASSERT(((uintptr_t)pp_dummy & MMU_PAGEOFFSET) == 0); 24530Sstevel@tonic-gate pp_dummy_pfn = kmem_alloc(sizeof (*pp_dummy_pfn) * 24540Sstevel@tonic-gate pp_dummy_npages, KM_SLEEP); 24550Sstevel@tonic-gate for (i = 0; i < pp_dummy_npages; i++) { 24560Sstevel@tonic-gate pp_dummy_pfn[i] = hat_getpfnum(kas.a_hat, 24570Sstevel@tonic-gate &pp_dummy[MMU_PAGESIZE * i]); 24580Sstevel@tonic-gate ASSERT(pp_dummy_pfn[i] != PFN_INVALID); 24590Sstevel@tonic-gate } 24600Sstevel@tonic-gate /* 24610Sstevel@tonic-gate * Initialize the page_t's to a known 'deleted' state 24620Sstevel@tonic-gate * that matches the state of deleted pages. 24630Sstevel@tonic-gate */ 24640Sstevel@tonic-gate memseg_remap_init_pages((page_t *)pp_dummy, 24650Sstevel@tonic-gate (page_t *)(pp_dummy + 24660Sstevel@tonic-gate ptob(pp_dummy_npages))); 24670Sstevel@tonic-gate /* Remove kmem mappings for the pages for safety. */ 24680Sstevel@tonic-gate hat_unload(kas.a_hat, pp_dummy, ptob(pp_dummy_npages), 24690Sstevel@tonic-gate HAT_UNLOAD_UNLOCK); 24700Sstevel@tonic-gate /* Leave pp_dummy pointer set as flag that init is done. */ 24710Sstevel@tonic-gate } 24720Sstevel@tonic-gate mutex_exit(&pp_dummy_lock); 24730Sstevel@tonic-gate } 24740Sstevel@tonic-gate 24750Sstevel@tonic-gate static void 24760Sstevel@tonic-gate memseg_remap_to_dummy(caddr_t pp, pgcnt_t metapgs) 24770Sstevel@tonic-gate { 24780Sstevel@tonic-gate ASSERT(pp_dummy != NULL); 24790Sstevel@tonic-gate 24800Sstevel@tonic-gate while (metapgs != 0) { 24810Sstevel@tonic-gate pgcnt_t n; 24820Sstevel@tonic-gate int i; 24830Sstevel@tonic-gate 24840Sstevel@tonic-gate n = pp_dummy_npages; 24850Sstevel@tonic-gate if (n > metapgs) 24860Sstevel@tonic-gate n = metapgs; 24870Sstevel@tonic-gate for (i = 0; i < n; i++) { 24880Sstevel@tonic-gate hat_devload(kas.a_hat, pp, ptob(1), pp_dummy_pfn[i], 24890Sstevel@tonic-gate PROT_READ, 24900Sstevel@tonic-gate HAT_LOAD | HAT_LOAD_NOCONSIST | 24910Sstevel@tonic-gate HAT_LOAD_REMAP); 24920Sstevel@tonic-gate pp += ptob(1); 24930Sstevel@tonic-gate } 24940Sstevel@tonic-gate metapgs -= n; 24950Sstevel@tonic-gate } 24960Sstevel@tonic-gate } 24970Sstevel@tonic-gate 24980Sstevel@tonic-gate /* 24990Sstevel@tonic-gate * Transition all the deleted pages to the deleted state so that 25000Sstevel@tonic-gate * page_lock will not wait. The page_lock_delete call will 25010Sstevel@tonic-gate * also wake up any waiters. 25020Sstevel@tonic-gate */ 25030Sstevel@tonic-gate static void 25040Sstevel@tonic-gate memseg_lock_delete_all(struct memseg *seg) 25050Sstevel@tonic-gate { 25060Sstevel@tonic-gate page_t *pp; 25070Sstevel@tonic-gate 25080Sstevel@tonic-gate for (pp = seg->pages; pp < seg->epages; pp++) { 25090Sstevel@tonic-gate pp->p_pagenum = PFN_INVALID; /* XXXX */ 25100Sstevel@tonic-gate page_lock_delete(pp); 25110Sstevel@tonic-gate } 25120Sstevel@tonic-gate } 25130Sstevel@tonic-gate 25140Sstevel@tonic-gate static void 25150Sstevel@tonic-gate kphysm_del_cleanup(struct mem_handle *mhp) 25160Sstevel@tonic-gate { 25170Sstevel@tonic-gate struct memdelspan *mdsp; 25180Sstevel@tonic-gate struct memseg *seg; 25190Sstevel@tonic-gate struct memseg **segpp; 25200Sstevel@tonic-gate struct memseg *seglist; 25210Sstevel@tonic-gate pfn_t p_end; 25220Sstevel@tonic-gate uint64_t avmem; 25230Sstevel@tonic-gate pgcnt_t avpgs; 25240Sstevel@tonic-gate pgcnt_t npgs; 25250Sstevel@tonic-gate 25260Sstevel@tonic-gate avpgs = mhp->mh_vm_pages; 25270Sstevel@tonic-gate 25280Sstevel@tonic-gate memsegs_lock(1); 25290Sstevel@tonic-gate 25300Sstevel@tonic-gate /* 25310Sstevel@tonic-gate * remove from main segment list. 25320Sstevel@tonic-gate */ 25330Sstevel@tonic-gate npgs = 0; 25340Sstevel@tonic-gate seglist = NULL; 25350Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 25360Sstevel@tonic-gate mdsp = mdsp->mds_next) { 25370Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs; 25380Sstevel@tonic-gate for (segpp = &memsegs; (seg = *segpp) != NULL; ) { 25390Sstevel@tonic-gate if (seg->pages_base >= p_end || 25400Sstevel@tonic-gate seg->pages_end <= mdsp->mds_base) { 25410Sstevel@tonic-gate /* Span and memseg don't overlap. */ 25420Sstevel@tonic-gate segpp = &((*segpp)->next); 25430Sstevel@tonic-gate continue; 25440Sstevel@tonic-gate } 25450Sstevel@tonic-gate ASSERT(seg->pages_base >= mdsp->mds_base); 25460Sstevel@tonic-gate ASSERT(seg->pages_end <= p_end); 25470Sstevel@tonic-gate 25480Sstevel@tonic-gate /* Hide the memseg from future scans. */ 25490Sstevel@tonic-gate hat_kpm_delmem_mseg_update(seg, segpp); 25500Sstevel@tonic-gate *segpp = seg->next; 25510Sstevel@tonic-gate membar_producer(); /* TODO: Needed? */ 25520Sstevel@tonic-gate npgs += MSEG_NPAGES(seg); 25530Sstevel@tonic-gate 25540Sstevel@tonic-gate /* 25550Sstevel@tonic-gate * Leave the deleted segment's next pointer intact 25560Sstevel@tonic-gate * in case a memsegs scanning loop is walking this 25570Sstevel@tonic-gate * segment concurrently. 25580Sstevel@tonic-gate */ 25590Sstevel@tonic-gate seg->lnext = seglist; 25600Sstevel@tonic-gate seglist = seg; 25610Sstevel@tonic-gate } 25620Sstevel@tonic-gate } 25630Sstevel@tonic-gate 25640Sstevel@tonic-gate build_pfn_hash(); 25650Sstevel@tonic-gate 25660Sstevel@tonic-gate ASSERT(npgs < total_pages); 25670Sstevel@tonic-gate total_pages -= npgs; 25680Sstevel@tonic-gate 25690Sstevel@tonic-gate /* 25700Sstevel@tonic-gate * Recalculate the paging parameters now total_pages has changed. 25710Sstevel@tonic-gate * This will also cause the clock hands to be reset before next use. 25720Sstevel@tonic-gate */ 25730Sstevel@tonic-gate setupclock(1); 25740Sstevel@tonic-gate 25750Sstevel@tonic-gate memsegs_unlock(1); 25760Sstevel@tonic-gate 25770Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 25780Sstevel@tonic-gate 25790Sstevel@tonic-gate while ((seg = seglist) != NULL) { 25800Sstevel@tonic-gate pfn_t mseg_start; 25810Sstevel@tonic-gate pfn_t mseg_base, mseg_end; 25820Sstevel@tonic-gate pgcnt_t mseg_npgs; 25830Sstevel@tonic-gate page_t *pp; 25840Sstevel@tonic-gate pgcnt_t metapgs; 25850Sstevel@tonic-gate int dynamic; 25860Sstevel@tonic-gate int mlret; 25870Sstevel@tonic-gate 25880Sstevel@tonic-gate seglist = seg->lnext; 25890Sstevel@tonic-gate 25900Sstevel@tonic-gate /* 25910Sstevel@tonic-gate * Put the page_t's into the deleted state to stop 25920Sstevel@tonic-gate * cv_wait()s on the pages. When we remap, the dummy 25930Sstevel@tonic-gate * page_t's will be in the same state. 25940Sstevel@tonic-gate */ 25950Sstevel@tonic-gate memseg_lock_delete_all(seg); 25960Sstevel@tonic-gate /* 25970Sstevel@tonic-gate * Collect up information based on pages_base and pages_end 25980Sstevel@tonic-gate * early so that we can flag early that the memseg has been 25990Sstevel@tonic-gate * deleted by setting pages_end == pages_base. 26000Sstevel@tonic-gate */ 26010Sstevel@tonic-gate mseg_base = seg->pages_base; 26020Sstevel@tonic-gate mseg_end = seg->pages_end; 26030Sstevel@tonic-gate mseg_npgs = MSEG_NPAGES(seg); 26040Sstevel@tonic-gate dynamic = memseg_is_dynamic(seg, &mseg_start); 26050Sstevel@tonic-gate 26060Sstevel@tonic-gate seg->pages_end = seg->pages_base; 26070Sstevel@tonic-gate 26080Sstevel@tonic-gate if (dynamic) { 26090Sstevel@tonic-gate pp = seg->pages; 26100Sstevel@tonic-gate metapgs = mseg_base - mseg_start; 26110Sstevel@tonic-gate ASSERT(metapgs != 0); 26120Sstevel@tonic-gate 26130Sstevel@tonic-gate /* Remap the meta data to our special dummy area. */ 26140Sstevel@tonic-gate memseg_remap_to_dummy((caddr_t)pp, metapgs); 26150Sstevel@tonic-gate 26160Sstevel@tonic-gate mutex_enter(&memseg_lists_lock); 26170Sstevel@tonic-gate seg->lnext = memseg_va_avail; 26180Sstevel@tonic-gate memseg_va_avail = seg; 26190Sstevel@tonic-gate mutex_exit(&memseg_lists_lock); 26200Sstevel@tonic-gate } else { 26210Sstevel@tonic-gate /* 26220Sstevel@tonic-gate * Set for clean-up below. 26230Sstevel@tonic-gate */ 26240Sstevel@tonic-gate mseg_start = seg->pages_base; 26250Sstevel@tonic-gate /* 26260Sstevel@tonic-gate * For memory whose page_ts were allocated 26270Sstevel@tonic-gate * at boot, we need to find a new use for 26280Sstevel@tonic-gate * the page_t memory. 26290Sstevel@tonic-gate * For the moment, just leak it. 26300Sstevel@tonic-gate * (It is held in the memseg_delete_junk list.) 26310Sstevel@tonic-gate */ 26320Sstevel@tonic-gate 26330Sstevel@tonic-gate mutex_enter(&memseg_lists_lock); 26340Sstevel@tonic-gate seg->lnext = memseg_delete_junk; 26350Sstevel@tonic-gate memseg_delete_junk = seg; 26360Sstevel@tonic-gate mutex_exit(&memseg_lists_lock); 26370Sstevel@tonic-gate } 26380Sstevel@tonic-gate 26390Sstevel@tonic-gate /* Must not use seg now as it could be re-used. */ 26400Sstevel@tonic-gate 26410Sstevel@tonic-gate memlist_write_lock(); 26420Sstevel@tonic-gate 26430Sstevel@tonic-gate mlret = memlist_delete_span( 26440Sstevel@tonic-gate (uint64_t)(mseg_base) << PAGESHIFT, 26450Sstevel@tonic-gate (uint64_t)(mseg_npgs) << PAGESHIFT, 26460Sstevel@tonic-gate &phys_avail); 26470Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK); 26480Sstevel@tonic-gate 26490Sstevel@tonic-gate mlret = memlist_delete_span( 26500Sstevel@tonic-gate (uint64_t)(mseg_start) << PAGESHIFT, 26510Sstevel@tonic-gate (uint64_t)(mseg_end - mseg_start) << 26520Sstevel@tonic-gate PAGESHIFT, 26530Sstevel@tonic-gate &phys_install); 26540Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK); 26550Sstevel@tonic-gate phys_install_has_changed(); 26560Sstevel@tonic-gate 26570Sstevel@tonic-gate memlist_write_unlock(); 26580Sstevel@tonic-gate } 26590Sstevel@tonic-gate 26600Sstevel@tonic-gate memlist_read_lock(); 26610Sstevel@tonic-gate installed_top_size(phys_install, &physmax, &physinstalled); 26620Sstevel@tonic-gate memlist_read_unlock(); 26630Sstevel@tonic-gate 26640Sstevel@tonic-gate mutex_enter(&freemem_lock); 26650Sstevel@tonic-gate maxmem -= avpgs; 26660Sstevel@tonic-gate physmem -= avpgs; 26670Sstevel@tonic-gate /* availrmem is adjusted during the delete. */ 26680Sstevel@tonic-gate availrmem_initial -= avpgs; 26690Sstevel@tonic-gate 26700Sstevel@tonic-gate mutex_exit(&freemem_lock); 26710Sstevel@tonic-gate 26720Sstevel@tonic-gate dump_resize(); 26730Sstevel@tonic-gate 26740Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_delete: mem = %ldK " 26750Sstevel@tonic-gate "(0x%" PRIx64 ")\n", 26760Sstevel@tonic-gate physinstalled << (PAGESHIFT - 10), 26770Sstevel@tonic-gate (uint64_t)physinstalled << PAGESHIFT); 26780Sstevel@tonic-gate 26790Sstevel@tonic-gate avmem = (uint64_t)freemem << PAGESHIFT; 26800Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_delete: " 26810Sstevel@tonic-gate "avail mem = %" PRId64 "\n", avmem); 26820Sstevel@tonic-gate 26830Sstevel@tonic-gate /* 26840Sstevel@tonic-gate * Update lgroup generation number on single lgroup systems 26850Sstevel@tonic-gate */ 26860Sstevel@tonic-gate if (nlgrps == 1) 26870Sstevel@tonic-gate lgrp_config(LGRP_CONFIG_GEN_UPDATE, 0, 0); 26880Sstevel@tonic-gate 26890Sstevel@tonic-gate /* Successfully deleted system memory */ 26900Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 26910Sstevel@tonic-gate } 26920Sstevel@tonic-gate 26930Sstevel@tonic-gate static uint_t mdel_nullvp_waiter; 26940Sstevel@tonic-gate 26950Sstevel@tonic-gate static void 26960Sstevel@tonic-gate page_delete_collect( 26970Sstevel@tonic-gate page_t *pp, 26980Sstevel@tonic-gate struct mem_handle *mhp) 26990Sstevel@tonic-gate { 27000Sstevel@tonic-gate if (pp->p_vnode) { 27010Sstevel@tonic-gate page_hashout(pp, (kmutex_t *)NULL); 27020Sstevel@tonic-gate /* do not do PP_SETAGED(pp); */ 27030Sstevel@tonic-gate } else { 27040Sstevel@tonic-gate kmutex_t *sep; 27050Sstevel@tonic-gate 27060Sstevel@tonic-gate sep = page_se_mutex(pp); 27070Sstevel@tonic-gate mutex_enter(sep); 27080Sstevel@tonic-gate if (CV_HAS_WAITERS(&pp->p_cv)) { 27090Sstevel@tonic-gate mdel_nullvp_waiter++; 27100Sstevel@tonic-gate cv_broadcast(&pp->p_cv); 27110Sstevel@tonic-gate } 27120Sstevel@tonic-gate mutex_exit(sep); 27130Sstevel@tonic-gate } 27140Sstevel@tonic-gate ASSERT(pp->p_next == pp->p_prev); 27150Sstevel@tonic-gate ASSERT(pp->p_next == NULL || pp->p_next == pp); 27160Sstevel@tonic-gate pp->p_next = mhp->mh_deleted; 27170Sstevel@tonic-gate mhp->mh_deleted = pp; 27180Sstevel@tonic-gate ASSERT(mhp->mh_hold_todo != 0); 27190Sstevel@tonic-gate mhp->mh_hold_todo--; 27200Sstevel@tonic-gate } 27210Sstevel@tonic-gate 27220Sstevel@tonic-gate static void 27230Sstevel@tonic-gate transit_list_collect(struct mem_handle *mhp, int v) 27240Sstevel@tonic-gate { 27250Sstevel@tonic-gate struct transit_list_head *trh; 27260Sstevel@tonic-gate 27270Sstevel@tonic-gate trh = &transit_list_head; 27280Sstevel@tonic-gate mutex_enter(&trh->trh_lock); 27290Sstevel@tonic-gate mhp->mh_transit.trl_collect = v; 27300Sstevel@tonic-gate mutex_exit(&trh->trh_lock); 27310Sstevel@tonic-gate } 27320Sstevel@tonic-gate 27330Sstevel@tonic-gate static void 27340Sstevel@tonic-gate transit_list_insert(struct transit_list *tlp) 27350Sstevel@tonic-gate { 27360Sstevel@tonic-gate struct transit_list_head *trh; 27370Sstevel@tonic-gate 27380Sstevel@tonic-gate trh = &transit_list_head; 27390Sstevel@tonic-gate ASSERT(MUTEX_HELD(&trh->trh_lock)); 27400Sstevel@tonic-gate tlp->trl_next = trh->trh_head; 27410Sstevel@tonic-gate trh->trh_head = tlp; 27420Sstevel@tonic-gate } 27430Sstevel@tonic-gate 27440Sstevel@tonic-gate static void 27450Sstevel@tonic-gate transit_list_remove(struct transit_list *tlp) 27460Sstevel@tonic-gate { 27470Sstevel@tonic-gate struct transit_list_head *trh; 27480Sstevel@tonic-gate struct transit_list **tlpp; 27490Sstevel@tonic-gate 27500Sstevel@tonic-gate trh = &transit_list_head; 27510Sstevel@tonic-gate tlpp = &trh->trh_head; 27520Sstevel@tonic-gate ASSERT(MUTEX_HELD(&trh->trh_lock)); 27530Sstevel@tonic-gate while (*tlpp != NULL && *tlpp != tlp) 27540Sstevel@tonic-gate tlpp = &(*tlpp)->trl_next; 27550Sstevel@tonic-gate ASSERT(*tlpp != NULL); 27560Sstevel@tonic-gate if (*tlpp == tlp) 27570Sstevel@tonic-gate *tlpp = tlp->trl_next; 27580Sstevel@tonic-gate tlp->trl_next = NULL; 27590Sstevel@tonic-gate } 27600Sstevel@tonic-gate 27610Sstevel@tonic-gate static struct transit_list * 27620Sstevel@tonic-gate pfnum_to_transit_list(struct transit_list_head *trh, pfn_t pfnum) 27630Sstevel@tonic-gate { 27640Sstevel@tonic-gate struct transit_list *tlp; 27650Sstevel@tonic-gate 27660Sstevel@tonic-gate for (tlp = trh->trh_head; tlp != NULL; tlp = tlp->trl_next) { 27670Sstevel@tonic-gate struct memdelspan *mdsp; 27680Sstevel@tonic-gate 27690Sstevel@tonic-gate for (mdsp = tlp->trl_spans; mdsp != NULL; 27700Sstevel@tonic-gate mdsp = mdsp->mds_next) { 27710Sstevel@tonic-gate if (pfnum >= mdsp->mds_base && 27720Sstevel@tonic-gate pfnum < (mdsp->mds_base + mdsp->mds_npgs)) { 27730Sstevel@tonic-gate return (tlp); 27740Sstevel@tonic-gate } 27750Sstevel@tonic-gate } 27760Sstevel@tonic-gate } 27770Sstevel@tonic-gate return (NULL); 27780Sstevel@tonic-gate } 27790Sstevel@tonic-gate 27800Sstevel@tonic-gate int 27810Sstevel@tonic-gate pfn_is_being_deleted(pfn_t pfnum) 27820Sstevel@tonic-gate { 27830Sstevel@tonic-gate struct transit_list_head *trh; 27840Sstevel@tonic-gate struct transit_list *tlp; 27850Sstevel@tonic-gate int ret; 27860Sstevel@tonic-gate 27870Sstevel@tonic-gate trh = &transit_list_head; 27880Sstevel@tonic-gate if (trh->trh_head == NULL) 27890Sstevel@tonic-gate return (0); 27900Sstevel@tonic-gate 27910Sstevel@tonic-gate mutex_enter(&trh->trh_lock); 27920Sstevel@tonic-gate tlp = pfnum_to_transit_list(trh, pfnum); 27930Sstevel@tonic-gate ret = (tlp != NULL && tlp->trl_collect); 27940Sstevel@tonic-gate mutex_exit(&trh->trh_lock); 27950Sstevel@tonic-gate 27960Sstevel@tonic-gate return (ret); 27970Sstevel@tonic-gate } 27980Sstevel@tonic-gate 27990Sstevel@tonic-gate #ifdef MEM_DEL_STATS 28000Sstevel@tonic-gate extern int hz; 28010Sstevel@tonic-gate static void 28020Sstevel@tonic-gate mem_del_stat_print_func(struct mem_handle *mhp) 28030Sstevel@tonic-gate { 28040Sstevel@tonic-gate uint64_t tmp; 28050Sstevel@tonic-gate 28060Sstevel@tonic-gate if (mem_del_stat_print) { 28070Sstevel@tonic-gate printf("memory delete loop %x/%x, statistics%s\n", 28080Sstevel@tonic-gate (uint_t)mhp->mh_transit.trl_spans->mds_base, 28090Sstevel@tonic-gate (uint_t)mhp->mh_transit.trl_spans->mds_npgs, 28100Sstevel@tonic-gate (mhp->mh_cancel ? " (cancelled)" : "")); 28110Sstevel@tonic-gate printf("\t%8u nloop\n", mhp->mh_delstat.nloop); 28120Sstevel@tonic-gate printf("\t%8u need_free\n", mhp->mh_delstat.need_free); 28130Sstevel@tonic-gate printf("\t%8u free_loop\n", mhp->mh_delstat.free_loop); 28140Sstevel@tonic-gate printf("\t%8u free_low\n", mhp->mh_delstat.free_low); 28150Sstevel@tonic-gate printf("\t%8u free_failed\n", mhp->mh_delstat.free_failed); 28160Sstevel@tonic-gate printf("\t%8u ncheck\n", mhp->mh_delstat.ncheck); 28170Sstevel@tonic-gate printf("\t%8u nopaget\n", mhp->mh_delstat.nopaget); 28180Sstevel@tonic-gate printf("\t%8u lockfail\n", mhp->mh_delstat.lockfail); 28190Sstevel@tonic-gate printf("\t%8u nfree\n", mhp->mh_delstat.nfree); 28200Sstevel@tonic-gate printf("\t%8u nreloc\n", mhp->mh_delstat.nreloc); 28210Sstevel@tonic-gate printf("\t%8u nrelocfail\n", mhp->mh_delstat.nrelocfail); 28220Sstevel@tonic-gate printf("\t%8u already_done\n", mhp->mh_delstat.already_done); 28230Sstevel@tonic-gate printf("\t%8u first_notfree\n", mhp->mh_delstat.first_notfree); 28240Sstevel@tonic-gate printf("\t%8u npplocked\n", mhp->mh_delstat.npplocked); 28250Sstevel@tonic-gate printf("\t%8u nlockreloc\n", mhp->mh_delstat.nlockreloc); 28260Sstevel@tonic-gate printf("\t%8u nnorepl\n", mhp->mh_delstat.nnorepl); 28270Sstevel@tonic-gate printf("\t%8u nmodreloc\n", mhp->mh_delstat.nmodreloc); 28280Sstevel@tonic-gate printf("\t%8u ndestroy\n", mhp->mh_delstat.ndestroy); 28290Sstevel@tonic-gate printf("\t%8u nputpage\n", mhp->mh_delstat.nputpage); 28300Sstevel@tonic-gate printf("\t%8u nnoreclaim\n", mhp->mh_delstat.nnoreclaim); 28310Sstevel@tonic-gate printf("\t%8u ndelay\n", mhp->mh_delstat.ndelay); 28320Sstevel@tonic-gate printf("\t%8u demotefail\n", mhp->mh_delstat.demotefail); 28330Sstevel@tonic-gate printf("\t%8u retired\n", mhp->mh_delstat.retired); 28340Sstevel@tonic-gate printf("\t%8u toxic\n", mhp->mh_delstat.toxic); 28350Sstevel@tonic-gate printf("\t%8u failing\n", mhp->mh_delstat.failing); 28360Sstevel@tonic-gate printf("\t%8u modtoxic\n", mhp->mh_delstat.modtoxic); 28370Sstevel@tonic-gate printf("\t%8u npplkdtoxic\n", mhp->mh_delstat.npplkdtoxic); 28380Sstevel@tonic-gate printf("\t%8u gptlmodfail\n", mhp->mh_delstat.gptlmodfail); 28390Sstevel@tonic-gate printf("\t%8u gptllckfail\n", mhp->mh_delstat.gptllckfail); 28400Sstevel@tonic-gate tmp = mhp->mh_delstat.nticks_total / hz; /* seconds */ 28410Sstevel@tonic-gate printf( 28420Sstevel@tonic-gate "\t%"PRIu64" nticks_total - %"PRIu64" min %"PRIu64" sec\n", 28430Sstevel@tonic-gate mhp->mh_delstat.nticks_total, tmp / 60, tmp % 60); 28440Sstevel@tonic-gate 28450Sstevel@tonic-gate tmp = mhp->mh_delstat.nticks_pgrp / hz; /* seconds */ 28460Sstevel@tonic-gate printf( 28470Sstevel@tonic-gate "\t%"PRIu64" nticks_pgrp - %"PRIu64" min %"PRIu64" sec\n", 28480Sstevel@tonic-gate mhp->mh_delstat.nticks_pgrp, tmp / 60, tmp % 60); 28490Sstevel@tonic-gate } 28500Sstevel@tonic-gate } 28510Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 28520Sstevel@tonic-gate 28530Sstevel@tonic-gate struct mem_callback { 28540Sstevel@tonic-gate kphysm_setup_vector_t *vec; 28550Sstevel@tonic-gate void *arg; 28560Sstevel@tonic-gate }; 28570Sstevel@tonic-gate 28580Sstevel@tonic-gate #define NMEMCALLBACKS 100 28590Sstevel@tonic-gate 28600Sstevel@tonic-gate static struct mem_callback mem_callbacks[NMEMCALLBACKS]; 28610Sstevel@tonic-gate static uint_t nmemcallbacks; 28620Sstevel@tonic-gate static krwlock_t mem_callback_rwlock; 28630Sstevel@tonic-gate 28640Sstevel@tonic-gate int 28650Sstevel@tonic-gate kphysm_setup_func_register(kphysm_setup_vector_t *vec, void *arg) 28660Sstevel@tonic-gate { 28670Sstevel@tonic-gate uint_t i, found; 28680Sstevel@tonic-gate 28690Sstevel@tonic-gate /* 28700Sstevel@tonic-gate * This test will become more complicated when the version must 28710Sstevel@tonic-gate * change. 28720Sstevel@tonic-gate */ 28730Sstevel@tonic-gate if (vec->version != KPHYSM_SETUP_VECTOR_VERSION) 28740Sstevel@tonic-gate return (EINVAL); 28750Sstevel@tonic-gate 28760Sstevel@tonic-gate if (vec->post_add == NULL || vec->pre_del == NULL || 28770Sstevel@tonic-gate vec->post_del == NULL) 28780Sstevel@tonic-gate return (EINVAL); 28790Sstevel@tonic-gate 28800Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_WRITER); 28810Sstevel@tonic-gate for (i = 0, found = 0; i < nmemcallbacks; i++) { 28820Sstevel@tonic-gate if (mem_callbacks[i].vec == NULL && found == 0) 28830Sstevel@tonic-gate found = i + 1; 28840Sstevel@tonic-gate if (mem_callbacks[i].vec == vec && 28850Sstevel@tonic-gate mem_callbacks[i].arg == arg) { 28860Sstevel@tonic-gate #ifdef DEBUG 28870Sstevel@tonic-gate /* Catch this in DEBUG kernels. */ 28880Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_setup_func_register" 28890Sstevel@tonic-gate "(0x%p, 0x%p) duplicate registration from 0x%p", 28900Sstevel@tonic-gate (void *)vec, arg, (void *)caller()); 28910Sstevel@tonic-gate #endif /* DEBUG */ 28920Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 28930Sstevel@tonic-gate return (EEXIST); 28940Sstevel@tonic-gate } 28950Sstevel@tonic-gate } 28960Sstevel@tonic-gate if (found != 0) { 28970Sstevel@tonic-gate i = found - 1; 28980Sstevel@tonic-gate } else { 28990Sstevel@tonic-gate ASSERT(nmemcallbacks < NMEMCALLBACKS); 29000Sstevel@tonic-gate if (nmemcallbacks == NMEMCALLBACKS) { 29010Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 29020Sstevel@tonic-gate return (ENOMEM); 29030Sstevel@tonic-gate } 29040Sstevel@tonic-gate i = nmemcallbacks++; 29050Sstevel@tonic-gate } 29060Sstevel@tonic-gate mem_callbacks[i].vec = vec; 29070Sstevel@tonic-gate mem_callbacks[i].arg = arg; 29080Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 29090Sstevel@tonic-gate return (0); 29100Sstevel@tonic-gate } 29110Sstevel@tonic-gate 29120Sstevel@tonic-gate void 29130Sstevel@tonic-gate kphysm_setup_func_unregister(kphysm_setup_vector_t *vec, void *arg) 29140Sstevel@tonic-gate { 29150Sstevel@tonic-gate uint_t i; 29160Sstevel@tonic-gate 29170Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_WRITER); 29180Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) { 29190Sstevel@tonic-gate if (mem_callbacks[i].vec == vec && 29200Sstevel@tonic-gate mem_callbacks[i].arg == arg) { 29210Sstevel@tonic-gate mem_callbacks[i].vec = NULL; 29220Sstevel@tonic-gate mem_callbacks[i].arg = NULL; 29230Sstevel@tonic-gate if (i == (nmemcallbacks - 1)) 29240Sstevel@tonic-gate nmemcallbacks--; 29250Sstevel@tonic-gate break; 29260Sstevel@tonic-gate } 29270Sstevel@tonic-gate } 29280Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 29290Sstevel@tonic-gate } 29300Sstevel@tonic-gate 29310Sstevel@tonic-gate static void 29320Sstevel@tonic-gate kphysm_setup_post_add(pgcnt_t delta_pages) 29330Sstevel@tonic-gate { 29340Sstevel@tonic-gate uint_t i; 29350Sstevel@tonic-gate 29360Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_READER); 29370Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) { 29380Sstevel@tonic-gate if (mem_callbacks[i].vec != NULL) { 29390Sstevel@tonic-gate (*mem_callbacks[i].vec->post_add) 29400Sstevel@tonic-gate (mem_callbacks[i].arg, delta_pages); 29410Sstevel@tonic-gate } 29420Sstevel@tonic-gate } 29430Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 29440Sstevel@tonic-gate } 29450Sstevel@tonic-gate 29460Sstevel@tonic-gate /* 29470Sstevel@tonic-gate * Note the locking between pre_del and post_del: The reader lock is held 29480Sstevel@tonic-gate * between the two calls to stop the set of functions from changing. 29490Sstevel@tonic-gate */ 29500Sstevel@tonic-gate 29510Sstevel@tonic-gate static int 29520Sstevel@tonic-gate kphysm_setup_pre_del(pgcnt_t delta_pages) 29530Sstevel@tonic-gate { 29540Sstevel@tonic-gate uint_t i; 29550Sstevel@tonic-gate int ret; 29560Sstevel@tonic-gate int aret; 29570Sstevel@tonic-gate 29580Sstevel@tonic-gate ret = 0; 29590Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_READER); 29600Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) { 29610Sstevel@tonic-gate if (mem_callbacks[i].vec != NULL) { 29620Sstevel@tonic-gate aret = (*mem_callbacks[i].vec->pre_del) 29630Sstevel@tonic-gate (mem_callbacks[i].arg, delta_pages); 29640Sstevel@tonic-gate ret |= aret; 29650Sstevel@tonic-gate } 29660Sstevel@tonic-gate } 29670Sstevel@tonic-gate 29680Sstevel@tonic-gate return (ret); 29690Sstevel@tonic-gate } 29700Sstevel@tonic-gate 29710Sstevel@tonic-gate static void 29720Sstevel@tonic-gate kphysm_setup_post_del(pgcnt_t delta_pages, int cancelled) 29730Sstevel@tonic-gate { 29740Sstevel@tonic-gate uint_t i; 29750Sstevel@tonic-gate 29760Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) { 29770Sstevel@tonic-gate if (mem_callbacks[i].vec != NULL) { 29780Sstevel@tonic-gate (*mem_callbacks[i].vec->post_del) 29790Sstevel@tonic-gate (mem_callbacks[i].arg, delta_pages, cancelled); 29800Sstevel@tonic-gate } 29810Sstevel@tonic-gate } 29820Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 29830Sstevel@tonic-gate } 29840Sstevel@tonic-gate 29850Sstevel@tonic-gate static int 29860Sstevel@tonic-gate kphysm_split_memseg( 29870Sstevel@tonic-gate pfn_t base, 29880Sstevel@tonic-gate pgcnt_t npgs) 29890Sstevel@tonic-gate { 29900Sstevel@tonic-gate struct memseg *seg; 29910Sstevel@tonic-gate struct memseg **segpp; 29920Sstevel@tonic-gate pgcnt_t size_low, size_high; 29930Sstevel@tonic-gate struct memseg *seg_low, *seg_mid, *seg_high; 29940Sstevel@tonic-gate 29950Sstevel@tonic-gate /* 29960Sstevel@tonic-gate * Lock the memsegs list against other updates now 29970Sstevel@tonic-gate */ 29980Sstevel@tonic-gate memsegs_lock(1); 29990Sstevel@tonic-gate 30000Sstevel@tonic-gate /* 30010Sstevel@tonic-gate * Find boot time memseg that wholly covers this area. 30020Sstevel@tonic-gate */ 30030Sstevel@tonic-gate 30040Sstevel@tonic-gate /* First find the memseg with page 'base' in it. */ 30050Sstevel@tonic-gate for (segpp = &memsegs; (seg = *segpp) != NULL; 30060Sstevel@tonic-gate segpp = &((*segpp)->next)) { 30070Sstevel@tonic-gate if (base >= seg->pages_base && base < seg->pages_end) 30080Sstevel@tonic-gate break; 30090Sstevel@tonic-gate } 30100Sstevel@tonic-gate if (seg == NULL) { 30110Sstevel@tonic-gate memsegs_unlock(1); 30120Sstevel@tonic-gate return (0); 30130Sstevel@tonic-gate } 30140Sstevel@tonic-gate if (memseg_is_dynamic(seg, (pfn_t *)NULL)) { 30150Sstevel@tonic-gate memsegs_unlock(1); 30160Sstevel@tonic-gate return (0); 30170Sstevel@tonic-gate } 30180Sstevel@tonic-gate if ((base + npgs) > seg->pages_end) { 30190Sstevel@tonic-gate memsegs_unlock(1); 30200Sstevel@tonic-gate return (0); 30210Sstevel@tonic-gate } 30220Sstevel@tonic-gate 30230Sstevel@tonic-gate /* 30240Sstevel@tonic-gate * Work out the size of the two segments that will 30250Sstevel@tonic-gate * surround the new segment, one for low address 30260Sstevel@tonic-gate * and one for high. 30270Sstevel@tonic-gate */ 30280Sstevel@tonic-gate ASSERT(base >= seg->pages_base); 30290Sstevel@tonic-gate size_low = base - seg->pages_base; 30300Sstevel@tonic-gate ASSERT(seg->pages_end >= (base + npgs)); 30310Sstevel@tonic-gate size_high = seg->pages_end - (base + npgs); 30320Sstevel@tonic-gate 30330Sstevel@tonic-gate /* 30340Sstevel@tonic-gate * Sanity check. 30350Sstevel@tonic-gate */ 30360Sstevel@tonic-gate if ((size_low + size_high) == 0) { 30370Sstevel@tonic-gate memsegs_unlock(1); 30380Sstevel@tonic-gate return (0); 30390Sstevel@tonic-gate } 30400Sstevel@tonic-gate 30410Sstevel@tonic-gate /* 30420Sstevel@tonic-gate * Allocate the new structures. The old memseg will not be freed 30430Sstevel@tonic-gate * as there may be a reference to it. 30440Sstevel@tonic-gate */ 30450Sstevel@tonic-gate seg_low = NULL; 30460Sstevel@tonic-gate seg_high = NULL; 30470Sstevel@tonic-gate 30480Sstevel@tonic-gate if (size_low != 0) { 30490Sstevel@tonic-gate seg_low = kmem_cache_alloc(memseg_cache, KM_SLEEP); 30500Sstevel@tonic-gate bzero(seg_low, sizeof (struct memseg)); 30510Sstevel@tonic-gate } 30520Sstevel@tonic-gate 30530Sstevel@tonic-gate seg_mid = kmem_cache_alloc(memseg_cache, KM_SLEEP); 30540Sstevel@tonic-gate bzero(seg_mid, sizeof (struct memseg)); 30550Sstevel@tonic-gate 30560Sstevel@tonic-gate if (size_high != 0) { 30570Sstevel@tonic-gate seg_high = kmem_cache_alloc(memseg_cache, KM_SLEEP); 30580Sstevel@tonic-gate bzero(seg_high, sizeof (struct memseg)); 30590Sstevel@tonic-gate } 30600Sstevel@tonic-gate 30610Sstevel@tonic-gate /* 30620Sstevel@tonic-gate * All allocation done now. 30630Sstevel@tonic-gate */ 30640Sstevel@tonic-gate if (size_low != 0) { 30650Sstevel@tonic-gate seg_low->pages = seg->pages; 30660Sstevel@tonic-gate seg_low->epages = seg_low->pages + size_low; 30670Sstevel@tonic-gate seg_low->pages_base = seg->pages_base; 30680Sstevel@tonic-gate seg_low->pages_end = seg_low->pages_base + size_low; 30690Sstevel@tonic-gate seg_low->next = seg_mid; 30700Sstevel@tonic-gate } 30710Sstevel@tonic-gate if (size_high != 0) { 30720Sstevel@tonic-gate seg_high->pages = seg->epages - size_high; 30730Sstevel@tonic-gate seg_high->epages = seg_high->pages + size_high; 30740Sstevel@tonic-gate seg_high->pages_base = seg->pages_end - size_high; 30750Sstevel@tonic-gate seg_high->pages_end = seg_high->pages_base + size_high; 30760Sstevel@tonic-gate seg_high->next = seg->next; 30770Sstevel@tonic-gate } 30780Sstevel@tonic-gate 30790Sstevel@tonic-gate seg_mid->pages = seg->pages + size_low; 30800Sstevel@tonic-gate seg_mid->pages_base = seg->pages_base + size_low; 30810Sstevel@tonic-gate seg_mid->epages = seg->epages - size_high; 30820Sstevel@tonic-gate seg_mid->pages_end = seg->pages_end - size_high; 30830Sstevel@tonic-gate seg_mid->next = (seg_high != NULL) ? seg_high : seg->next; 30840Sstevel@tonic-gate 30850Sstevel@tonic-gate /* 30860Sstevel@tonic-gate * Update hat_kpm specific info of all involved memsegs and 30870Sstevel@tonic-gate * allow hat_kpm specific global chain updates. 30880Sstevel@tonic-gate */ 30890Sstevel@tonic-gate hat_kpm_split_mseg_update(seg, segpp, seg_low, seg_mid, seg_high); 30900Sstevel@tonic-gate 30910Sstevel@tonic-gate /* 30920Sstevel@tonic-gate * At this point we have two equivalent memseg sub-chains, 30930Sstevel@tonic-gate * seg and seg_low/seg_mid/seg_high, which both chain on to 30940Sstevel@tonic-gate * the same place in the global chain. By re-writing the pointer 30950Sstevel@tonic-gate * in the previous element we switch atomically from using the old 30960Sstevel@tonic-gate * (seg) to the new. 30970Sstevel@tonic-gate */ 30980Sstevel@tonic-gate *segpp = (seg_low != NULL) ? seg_low : seg_mid; 30990Sstevel@tonic-gate 31000Sstevel@tonic-gate membar_enter(); 31010Sstevel@tonic-gate 31020Sstevel@tonic-gate build_pfn_hash(); 31030Sstevel@tonic-gate memsegs_unlock(1); 31040Sstevel@tonic-gate 31050Sstevel@tonic-gate /* 31060Sstevel@tonic-gate * We leave the old segment, 'seg', intact as there may be 31070Sstevel@tonic-gate * references to it. Also, as the value of total_pages has not 31080Sstevel@tonic-gate * changed and the memsegs list is effectively the same when 31090Sstevel@tonic-gate * accessed via the old or the new pointer, we do not have to 31100Sstevel@tonic-gate * cause pageout_scanner() to re-evaluate its hand pointers. 31110Sstevel@tonic-gate * 31120Sstevel@tonic-gate * We currently do not re-use or reclaim the page_t memory. 31130Sstevel@tonic-gate * If we do, then this may have to change. 31140Sstevel@tonic-gate */ 31150Sstevel@tonic-gate 31160Sstevel@tonic-gate mutex_enter(&memseg_lists_lock); 31170Sstevel@tonic-gate seg->lnext = memseg_edit_junk; 31180Sstevel@tonic-gate memseg_edit_junk = seg; 31190Sstevel@tonic-gate mutex_exit(&memseg_lists_lock); 31200Sstevel@tonic-gate 31210Sstevel@tonic-gate return (1); 31220Sstevel@tonic-gate } 31230Sstevel@tonic-gate 31240Sstevel@tonic-gate /* 31250Sstevel@tonic-gate * The memsegs lock is only taken when modifying the memsegs list 31260Sstevel@tonic-gate * and rebuilding the pfn hash table (after boot). 31270Sstevel@tonic-gate * No lock is needed for read as memseg structure are never de-allocated 31280Sstevel@tonic-gate * and the pointer linkage is never updated until the memseg is ready. 31290Sstevel@tonic-gate */ 31300Sstevel@tonic-gate krwlock_t memsegslock; 31310Sstevel@tonic-gate 31320Sstevel@tonic-gate void 31330Sstevel@tonic-gate memsegs_lock(int writer) 31340Sstevel@tonic-gate { 31350Sstevel@tonic-gate rw_enter(&memsegslock, writer ? RW_WRITER : RW_READER); 31360Sstevel@tonic-gate } 31370Sstevel@tonic-gate 31380Sstevel@tonic-gate /*ARGSUSED*/ 31390Sstevel@tonic-gate void 31400Sstevel@tonic-gate memsegs_unlock(int writer) 31410Sstevel@tonic-gate { 31420Sstevel@tonic-gate rw_exit(&memsegslock); 31430Sstevel@tonic-gate } 31440Sstevel@tonic-gate 31450Sstevel@tonic-gate /* 31460Sstevel@tonic-gate * memlist (phys_install, phys_avail) locking. 31470Sstevel@tonic-gate */ 31480Sstevel@tonic-gate 31490Sstevel@tonic-gate /* 31500Sstevel@tonic-gate * A read/write lock might be better here. 31510Sstevel@tonic-gate */ 31520Sstevel@tonic-gate static kmutex_t memlists_mutex; 31530Sstevel@tonic-gate 31540Sstevel@tonic-gate void 31550Sstevel@tonic-gate memlist_read_lock() 31560Sstevel@tonic-gate { 31570Sstevel@tonic-gate mutex_enter(&memlists_mutex); 31580Sstevel@tonic-gate } 31590Sstevel@tonic-gate 31600Sstevel@tonic-gate void 31610Sstevel@tonic-gate memlist_read_unlock() 31620Sstevel@tonic-gate { 31630Sstevel@tonic-gate mutex_exit(&memlists_mutex); 31640Sstevel@tonic-gate } 31650Sstevel@tonic-gate 31660Sstevel@tonic-gate void 31670Sstevel@tonic-gate memlist_write_lock() 31680Sstevel@tonic-gate { 31690Sstevel@tonic-gate mutex_enter(&memlists_mutex); 31700Sstevel@tonic-gate } 31710Sstevel@tonic-gate 31720Sstevel@tonic-gate void 31730Sstevel@tonic-gate memlist_write_unlock() 31740Sstevel@tonic-gate { 31750Sstevel@tonic-gate mutex_exit(&memlists_mutex); 31760Sstevel@tonic-gate } 31770Sstevel@tonic-gate 31780Sstevel@tonic-gate /* 31790Sstevel@tonic-gate * The sfmmu hat layer (e.g.) accesses some parts of the memseg 31800Sstevel@tonic-gate * structure using physical addresses. Therefore a kmem_cache is 31810Sstevel@tonic-gate * used with KMC_NOHASH to avoid page crossings within a memseg 31820Sstevel@tonic-gate * structure. KMC_NOHASH requires that no external (outside of 31830Sstevel@tonic-gate * slab) information is allowed. This, in turn, implies that the 31840Sstevel@tonic-gate * cache's slabsize must be exactly a single page, since per-slab 31850Sstevel@tonic-gate * information (e.g. the freelist for the slab) is kept at the 31860Sstevel@tonic-gate * end of the slab, where it is easy to locate. Should be changed 31870Sstevel@tonic-gate * when a more obvious kmem_cache interface/flag will become 31880Sstevel@tonic-gate * available. 31890Sstevel@tonic-gate */ 31900Sstevel@tonic-gate void 31910Sstevel@tonic-gate mem_config_init() 31920Sstevel@tonic-gate { 31930Sstevel@tonic-gate memseg_cache = kmem_cache_create("memseg_cache", sizeof (struct memseg), 31940Sstevel@tonic-gate 0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH); 31950Sstevel@tonic-gate } 3196