10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 51582Skchow * Common Development and Distribution License (the "License"). 61582Skchow * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 22*11474SJonathan.Adams@Sun.COM * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate #include <sys/types.h> 270Sstevel@tonic-gate #include <sys/cmn_err.h> 280Sstevel@tonic-gate #include <sys/vmem.h> 290Sstevel@tonic-gate #include <sys/kmem.h> 300Sstevel@tonic-gate #include <sys/systm.h> 310Sstevel@tonic-gate #include <sys/machsystm.h> /* for page_freelist_coalesce() */ 320Sstevel@tonic-gate #include <sys/errno.h> 330Sstevel@tonic-gate #include <sys/memnode.h> 340Sstevel@tonic-gate #include <sys/memlist.h> 350Sstevel@tonic-gate #include <sys/memlist_impl.h> 360Sstevel@tonic-gate #include <sys/tuneable.h> 370Sstevel@tonic-gate #include <sys/proc.h> 380Sstevel@tonic-gate #include <sys/disp.h> 390Sstevel@tonic-gate #include <sys/debug.h> 400Sstevel@tonic-gate #include <sys/vm.h> 410Sstevel@tonic-gate #include <sys/callb.h> 420Sstevel@tonic-gate #include <sys/memlist_plat.h> /* for installed_top_size() */ 430Sstevel@tonic-gate #include <sys/condvar_impl.h> /* for CV_HAS_WAITERS() */ 440Sstevel@tonic-gate #include <sys/dumphdr.h> /* for dump_resize() */ 450Sstevel@tonic-gate #include <sys/atomic.h> /* for use in stats collection */ 460Sstevel@tonic-gate #include <sys/rwlock.h> 470Sstevel@tonic-gate #include <sys/cpuvar.h> 480Sstevel@tonic-gate #include <vm/seg_kmem.h> 490Sstevel@tonic-gate #include <vm/seg_kpm.h> 500Sstevel@tonic-gate #include <vm/page.h> 511373Skchow #include <vm/vm_dep.h> 520Sstevel@tonic-gate #define SUNDDI_IMPL /* so sunddi.h will not redefine splx() et al */ 530Sstevel@tonic-gate #include <sys/sunddi.h> 540Sstevel@tonic-gate #include <sys/mem_config.h> 550Sstevel@tonic-gate #include <sys/mem_cage.h> 560Sstevel@tonic-gate #include <sys/lgrp.h> 570Sstevel@tonic-gate #include <sys/ddi.h> 580Sstevel@tonic-gate #include <sys/modctl.h> 590Sstevel@tonic-gate 600Sstevel@tonic-gate extern struct memlist *phys_avail; 610Sstevel@tonic-gate 620Sstevel@tonic-gate extern void mem_node_add(pfn_t, pfn_t); 630Sstevel@tonic-gate extern void mem_node_del(pfn_t, pfn_t); 640Sstevel@tonic-gate 650Sstevel@tonic-gate extern uint_t page_ctrs_adjust(int); 6611185SSean.McEnroe@Sun.COM void page_ctrs_cleanup(void); 670Sstevel@tonic-gate static void kphysm_setup_post_add(pgcnt_t); 680Sstevel@tonic-gate static int kphysm_setup_pre_del(pgcnt_t); 690Sstevel@tonic-gate static void kphysm_setup_post_del(pgcnt_t, int); 700Sstevel@tonic-gate 710Sstevel@tonic-gate static int kphysm_split_memseg(pfn_t base, pgcnt_t npgs); 720Sstevel@tonic-gate 730Sstevel@tonic-gate static int delspan_reserve(pfn_t, pgcnt_t); 740Sstevel@tonic-gate static void delspan_unreserve(pfn_t, pgcnt_t); 750Sstevel@tonic-gate 7610106SJason.Beloro@Sun.COM kmutex_t memseg_lists_lock; 7710106SJason.Beloro@Sun.COM struct memseg *memseg_va_avail; 7810106SJason.Beloro@Sun.COM struct memseg *memseg_alloc(void); 790Sstevel@tonic-gate static struct memseg *memseg_delete_junk; 800Sstevel@tonic-gate static struct memseg *memseg_edit_junk; 810Sstevel@tonic-gate void memseg_remap_init(void); 8210106SJason.Beloro@Sun.COM static void memseg_remap_to_dummy(struct memseg *); 830Sstevel@tonic-gate static void kphysm_addmem_error_undospan(pfn_t, pgcnt_t); 840Sstevel@tonic-gate static struct memseg *memseg_reuse(pgcnt_t); 850Sstevel@tonic-gate 860Sstevel@tonic-gate static struct kmem_cache *memseg_cache; 870Sstevel@tonic-gate 880Sstevel@tonic-gate /* 8910106SJason.Beloro@Sun.COM * Interfaces to manage externally allocated 9010106SJason.Beloro@Sun.COM * page_t memory (metadata) for a memseg. 9110106SJason.Beloro@Sun.COM */ 9210106SJason.Beloro@Sun.COM #pragma weak memseg_alloc_meta 9310106SJason.Beloro@Sun.COM #pragma weak memseg_free_meta 9410106SJason.Beloro@Sun.COM #pragma weak memseg_get_metapfn 9510106SJason.Beloro@Sun.COM #pragma weak memseg_remap_meta 9610106SJason.Beloro@Sun.COM 9710106SJason.Beloro@Sun.COM extern int ppvm_enable; 9810106SJason.Beloro@Sun.COM extern page_t *ppvm_base; 9910106SJason.Beloro@Sun.COM extern int memseg_alloc_meta(pfn_t, pgcnt_t, void **, pgcnt_t *); 10010106SJason.Beloro@Sun.COM extern void memseg_free_meta(void *, pgcnt_t); 10110106SJason.Beloro@Sun.COM extern pfn_t memseg_get_metapfn(void *, pgcnt_t); 10210106SJason.Beloro@Sun.COM extern void memseg_remap_meta(struct memseg *); 10310106SJason.Beloro@Sun.COM static int memseg_is_dynamic(struct memseg *); 10410106SJason.Beloro@Sun.COM static int memseg_includes_meta(struct memseg *); 10511185SSean.McEnroe@Sun.COM pfn_t memseg_get_start(struct memseg *); 10610106SJason.Beloro@Sun.COM static void memseg_cpu_vm_flush(void); 10710106SJason.Beloro@Sun.COM 10810106SJason.Beloro@Sun.COM int meta_alloc_enable; 10910106SJason.Beloro@Sun.COM 11010106SJason.Beloro@Sun.COM /* 11110106SJason.Beloro@Sun.COM * Add a chunk of memory to the system. 1120Sstevel@tonic-gate * base: starting PAGESIZE page of new memory. 1130Sstevel@tonic-gate * npgs: length in PAGESIZE pages. 1140Sstevel@tonic-gate * 1150Sstevel@tonic-gate * Adding mem this way doesn't increase the size of the hash tables; 1160Sstevel@tonic-gate * growing them would be too hard. This should be OK, but adding memory 1170Sstevel@tonic-gate * dynamically most likely means more hash misses, since the tables will 1180Sstevel@tonic-gate * be smaller than they otherwise would be. 1190Sstevel@tonic-gate */ 12010106SJason.Beloro@Sun.COM #ifdef DEBUG 12110106SJason.Beloro@Sun.COM static int memseg_debug; 12210106SJason.Beloro@Sun.COM #define MEMSEG_DEBUG(args...) if (memseg_debug) printf(args) 12310106SJason.Beloro@Sun.COM #else 12410106SJason.Beloro@Sun.COM #define MEMSEG_DEBUG(...) 12510106SJason.Beloro@Sun.COM #endif 12610106SJason.Beloro@Sun.COM 1270Sstevel@tonic-gate int 1280Sstevel@tonic-gate kphysm_add_memory_dynamic(pfn_t base, pgcnt_t npgs) 1290Sstevel@tonic-gate { 13010106SJason.Beloro@Sun.COM page_t *pp; 13110106SJason.Beloro@Sun.COM page_t *opp, *oepp, *segpp; 1320Sstevel@tonic-gate struct memseg *seg; 1330Sstevel@tonic-gate uint64_t avmem; 1340Sstevel@tonic-gate pfn_t pfn; 1350Sstevel@tonic-gate pfn_t pt_base = base; 1360Sstevel@tonic-gate pgcnt_t tpgs = npgs; 13710106SJason.Beloro@Sun.COM pgcnt_t metapgs = 0; 1380Sstevel@tonic-gate int exhausted; 1390Sstevel@tonic-gate pfn_t pnum; 1400Sstevel@tonic-gate int mnode; 1410Sstevel@tonic-gate caddr_t vaddr; 1420Sstevel@tonic-gate int reuse; 1430Sstevel@tonic-gate int mlret; 14410106SJason.Beloro@Sun.COM int rv; 14510106SJason.Beloro@Sun.COM int flags; 14610106SJason.Beloro@Sun.COM int meta_alloc = 0; 1470Sstevel@tonic-gate void *mapva; 14810106SJason.Beloro@Sun.COM void *metabase = (void *)base; 1490Sstevel@tonic-gate pgcnt_t nkpmpgs = 0; 1500Sstevel@tonic-gate offset_t kpm_pages_off; 1510Sstevel@tonic-gate 1520Sstevel@tonic-gate cmn_err(CE_CONT, 1530Sstevel@tonic-gate "?kphysm_add_memory_dynamic: adding %ldK at 0x%" PRIx64 "\n", 1540Sstevel@tonic-gate npgs << (PAGESHIFT - 10), (uint64_t)base << PAGESHIFT); 1550Sstevel@tonic-gate 1560Sstevel@tonic-gate /* 1570Sstevel@tonic-gate * Add this span in the delete list to prevent interactions. 1580Sstevel@tonic-gate */ 1590Sstevel@tonic-gate if (!delspan_reserve(base, npgs)) { 1600Sstevel@tonic-gate return (KPHYSM_ESPAN); 1610Sstevel@tonic-gate } 1620Sstevel@tonic-gate /* 1630Sstevel@tonic-gate * Check to see if any of the memory span has been added 1640Sstevel@tonic-gate * by trying an add to the installed memory list. This 1650Sstevel@tonic-gate * forms the interlocking process for add. 1660Sstevel@tonic-gate */ 1670Sstevel@tonic-gate 1680Sstevel@tonic-gate memlist_write_lock(); 1690Sstevel@tonic-gate 1700Sstevel@tonic-gate mlret = memlist_add_span((uint64_t)(pt_base) << PAGESHIFT, 1710Sstevel@tonic-gate (uint64_t)(tpgs) << PAGESHIFT, &phys_install); 1720Sstevel@tonic-gate 1730Sstevel@tonic-gate if (mlret == MEML_SPANOP_OK) 1740Sstevel@tonic-gate installed_top_size(phys_install, &physmax, &physinstalled); 1750Sstevel@tonic-gate 1760Sstevel@tonic-gate memlist_write_unlock(); 1770Sstevel@tonic-gate 1780Sstevel@tonic-gate if (mlret != MEML_SPANOP_OK) { 1790Sstevel@tonic-gate if (mlret == MEML_SPANOP_EALLOC) { 1800Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs); 1810Sstevel@tonic-gate return (KPHYSM_ERESOURCE); 18210106SJason.Beloro@Sun.COM } else if (mlret == MEML_SPANOP_ESPAN) { 1830Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs); 1840Sstevel@tonic-gate return (KPHYSM_ESPAN); 1850Sstevel@tonic-gate } else { 1860Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs); 1870Sstevel@tonic-gate return (KPHYSM_ERESOURCE); 1880Sstevel@tonic-gate } 1890Sstevel@tonic-gate } 1900Sstevel@tonic-gate 19110106SJason.Beloro@Sun.COM if (meta_alloc_enable) { 19210106SJason.Beloro@Sun.COM /* 19310106SJason.Beloro@Sun.COM * Allocate the page_t's from existing memory; 19410106SJason.Beloro@Sun.COM * if that fails, allocate from the incoming memory. 19510106SJason.Beloro@Sun.COM */ 19610106SJason.Beloro@Sun.COM rv = memseg_alloc_meta(base, npgs, &metabase, &metapgs); 19710106SJason.Beloro@Sun.COM if (rv == KPHYSM_OK) { 19810106SJason.Beloro@Sun.COM ASSERT(metapgs); 19910106SJason.Beloro@Sun.COM ASSERT(btopr(npgs * sizeof (page_t)) <= metapgs); 20010106SJason.Beloro@Sun.COM meta_alloc = 1; 20110106SJason.Beloro@Sun.COM goto mapalloc; 20210106SJason.Beloro@Sun.COM } 20310106SJason.Beloro@Sun.COM } 20410106SJason.Beloro@Sun.COM 2050Sstevel@tonic-gate /* 2060Sstevel@tonic-gate * We store the page_t's for this new memory in the first 2070Sstevel@tonic-gate * few pages of the chunk. Here, we go and get'em ... 2080Sstevel@tonic-gate */ 2090Sstevel@tonic-gate 2100Sstevel@tonic-gate /* 2110Sstevel@tonic-gate * The expression after the '-' gives the number of pages 2120Sstevel@tonic-gate * that will fit in the new memory based on a requirement 2130Sstevel@tonic-gate * of (PAGESIZE + sizeof (page_t)) bytes per page. 2140Sstevel@tonic-gate */ 2150Sstevel@tonic-gate metapgs = npgs - (((uint64_t)(npgs) << PAGESHIFT) / 2160Sstevel@tonic-gate (PAGESIZE + sizeof (page_t))); 2170Sstevel@tonic-gate 2180Sstevel@tonic-gate npgs -= metapgs; 2190Sstevel@tonic-gate base += metapgs; 2200Sstevel@tonic-gate 2210Sstevel@tonic-gate ASSERT(btopr(npgs * sizeof (page_t)) <= metapgs); 2220Sstevel@tonic-gate 2230Sstevel@tonic-gate exhausted = (metapgs == 0 || npgs == 0); 2240Sstevel@tonic-gate 2250Sstevel@tonic-gate if (kpm_enable && !exhausted) { 2260Sstevel@tonic-gate pgcnt_t start, end, nkpmpgs_prelim; 2270Sstevel@tonic-gate size_t ptsz; 2280Sstevel@tonic-gate 2290Sstevel@tonic-gate /* 2300Sstevel@tonic-gate * A viable kpm large page mapping must not overlap two 2310Sstevel@tonic-gate * dynamic memsegs. Therefore the total size is checked 2320Sstevel@tonic-gate * to be at least kpm_pgsz and also whether start and end 2330Sstevel@tonic-gate * points are at least kpm_pgsz aligned. 2340Sstevel@tonic-gate */ 2350Sstevel@tonic-gate if (ptokpmp(tpgs) < 1 || pmodkpmp(pt_base) || 2360Sstevel@tonic-gate pmodkpmp(base + npgs)) { 2370Sstevel@tonic-gate 2380Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs); 2390Sstevel@tonic-gate 2400Sstevel@tonic-gate /* 2410Sstevel@tonic-gate * There is no specific error code for violating 2420Sstevel@tonic-gate * kpm granularity constraints. 2430Sstevel@tonic-gate */ 2440Sstevel@tonic-gate return (KPHYSM_ENOTVIABLE); 2450Sstevel@tonic-gate } 2460Sstevel@tonic-gate 2470Sstevel@tonic-gate start = kpmptop(ptokpmp(base)); 2480Sstevel@tonic-gate end = kpmptop(ptokpmp(base + npgs)); 2490Sstevel@tonic-gate nkpmpgs_prelim = ptokpmp(end - start); 2500Sstevel@tonic-gate ptsz = npgs * sizeof (page_t); 2510Sstevel@tonic-gate metapgs = btopr(ptsz + nkpmpgs_prelim * KPMPAGE_T_SZ); 2520Sstevel@tonic-gate exhausted = (tpgs <= metapgs); 2530Sstevel@tonic-gate if (!exhausted) { 2540Sstevel@tonic-gate npgs = tpgs - metapgs; 2550Sstevel@tonic-gate base = pt_base + metapgs; 2560Sstevel@tonic-gate 2570Sstevel@tonic-gate /* final nkpmpgs */ 2580Sstevel@tonic-gate start = kpmptop(ptokpmp(base)); 2590Sstevel@tonic-gate nkpmpgs = ptokpmp(end - start); 2600Sstevel@tonic-gate kpm_pages_off = ptsz + 2616242Smb158278 (nkpmpgs_prelim - nkpmpgs) * KPMPAGE_T_SZ; 2620Sstevel@tonic-gate } 2630Sstevel@tonic-gate } 2640Sstevel@tonic-gate 2650Sstevel@tonic-gate /* 2660Sstevel@tonic-gate * Is memory area supplied too small? 2670Sstevel@tonic-gate */ 2680Sstevel@tonic-gate if (exhausted) { 2690Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs); 2700Sstevel@tonic-gate /* 2710Sstevel@tonic-gate * There is no specific error code for 'too small'. 2720Sstevel@tonic-gate */ 2730Sstevel@tonic-gate return (KPHYSM_ERESOURCE); 2740Sstevel@tonic-gate } 2750Sstevel@tonic-gate 27610106SJason.Beloro@Sun.COM mapalloc: 2770Sstevel@tonic-gate /* 2780Sstevel@tonic-gate * We may re-use a previously allocated VA space for the page_ts 2790Sstevel@tonic-gate * eventually, but we need to initialize and lock the pages first. 2800Sstevel@tonic-gate */ 2810Sstevel@tonic-gate 2820Sstevel@tonic-gate /* 2830Sstevel@tonic-gate * Get an address in the kernel address map, map 2840Sstevel@tonic-gate * the page_t pages and see if we can touch them. 2850Sstevel@tonic-gate */ 2860Sstevel@tonic-gate 2870Sstevel@tonic-gate mapva = vmem_alloc(heap_arena, ptob(metapgs), VM_NOSLEEP); 2880Sstevel@tonic-gate if (mapva == NULL) { 2890Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_add_memory_dynamic:" 2900Sstevel@tonic-gate " Can't allocate VA for page_ts"); 2910Sstevel@tonic-gate 29210106SJason.Beloro@Sun.COM if (meta_alloc) 29310106SJason.Beloro@Sun.COM memseg_free_meta(metabase, metapgs); 2940Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs); 2950Sstevel@tonic-gate 2960Sstevel@tonic-gate return (KPHYSM_ERESOURCE); 2970Sstevel@tonic-gate } 2980Sstevel@tonic-gate pp = mapva; 2990Sstevel@tonic-gate 3000Sstevel@tonic-gate if (physmax < (pt_base + tpgs)) 3010Sstevel@tonic-gate physmax = (pt_base + tpgs); 3020Sstevel@tonic-gate 3030Sstevel@tonic-gate /* 3040Sstevel@tonic-gate * In the remapping code we map one page at a time so we must do 3050Sstevel@tonic-gate * the same here to match mapping sizes. 3060Sstevel@tonic-gate */ 3070Sstevel@tonic-gate pfn = pt_base; 3080Sstevel@tonic-gate vaddr = (caddr_t)pp; 3090Sstevel@tonic-gate for (pnum = 0; pnum < metapgs; pnum++) { 31010106SJason.Beloro@Sun.COM if (meta_alloc) 31110106SJason.Beloro@Sun.COM pfn = memseg_get_metapfn(metabase, (pgcnt_t)pnum); 3120Sstevel@tonic-gate hat_devload(kas.a_hat, vaddr, ptob(1), pfn, 3130Sstevel@tonic-gate PROT_READ | PROT_WRITE, 3140Sstevel@tonic-gate HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST); 3150Sstevel@tonic-gate pfn++; 3160Sstevel@tonic-gate vaddr += ptob(1); 3170Sstevel@tonic-gate } 3180Sstevel@tonic-gate 3190Sstevel@tonic-gate if (ddi_peek32((dev_info_t *)NULL, 3200Sstevel@tonic-gate (int32_t *)pp, (int32_t *)0) == DDI_FAILURE) { 3210Sstevel@tonic-gate 3227694SJakub.Jirsa@Sun.COM cmn_err(CE_WARN, "kphysm_add_memory_dynamic:" 3230Sstevel@tonic-gate " Can't access pp array at 0x%p [phys 0x%lx]", 3240Sstevel@tonic-gate (void *)pp, pt_base); 3250Sstevel@tonic-gate 3260Sstevel@tonic-gate hat_unload(kas.a_hat, (caddr_t)pp, ptob(metapgs), 3270Sstevel@tonic-gate HAT_UNLOAD_UNMAP|HAT_UNLOAD_UNLOCK); 3280Sstevel@tonic-gate 3290Sstevel@tonic-gate vmem_free(heap_arena, mapva, ptob(metapgs)); 33010106SJason.Beloro@Sun.COM if (meta_alloc) 33110106SJason.Beloro@Sun.COM memseg_free_meta(metabase, metapgs); 3320Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs); 3330Sstevel@tonic-gate 3340Sstevel@tonic-gate return (KPHYSM_EFAULT); 3350Sstevel@tonic-gate } 3360Sstevel@tonic-gate 3370Sstevel@tonic-gate /* 3380Sstevel@tonic-gate * Add this memory slice to its memory node translation. 3390Sstevel@tonic-gate * 3400Sstevel@tonic-gate * Note that right now, each node may have only one slice; 3410Sstevel@tonic-gate * this may change with COD or in larger SSM systems with 3420Sstevel@tonic-gate * nested latency groups, so we must not assume that the 3430Sstevel@tonic-gate * node does not yet exist. 3440Sstevel@tonic-gate */ 3457923SChristopher.Baumbauer@Sun.COM pnum = pt_base + tpgs - 1; 34610106SJason.Beloro@Sun.COM mem_node_add_range(pt_base, pnum); 3470Sstevel@tonic-gate 3480Sstevel@tonic-gate /* 3495331Samw * Allocate or resize page counters as necessary to accommodate 3500Sstevel@tonic-gate * the increase in memory pages. 3510Sstevel@tonic-gate */ 3520Sstevel@tonic-gate mnode = PFN_2_MEM_NODE(pnum); 35310106SJason.Beloro@Sun.COM PAGE_CTRS_ADJUST(base, npgs, rv); 35410106SJason.Beloro@Sun.COM if (rv) { 35510106SJason.Beloro@Sun.COM 35610106SJason.Beloro@Sun.COM mem_node_del_range(pt_base, pnum); 3570Sstevel@tonic-gate 35811185SSean.McEnroe@Sun.COM /* cleanup the page counters */ 35911185SSean.McEnroe@Sun.COM page_ctrs_cleanup(); 36011185SSean.McEnroe@Sun.COM 3610Sstevel@tonic-gate hat_unload(kas.a_hat, (caddr_t)pp, ptob(metapgs), 3620Sstevel@tonic-gate HAT_UNLOAD_UNMAP|HAT_UNLOAD_UNLOCK); 3630Sstevel@tonic-gate 3640Sstevel@tonic-gate vmem_free(heap_arena, mapva, ptob(metapgs)); 36510106SJason.Beloro@Sun.COM if (meta_alloc) 36610106SJason.Beloro@Sun.COM memseg_free_meta(metabase, metapgs); 3670Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs); 3680Sstevel@tonic-gate 3690Sstevel@tonic-gate return (KPHYSM_ERESOURCE); 3700Sstevel@tonic-gate } 3710Sstevel@tonic-gate 3720Sstevel@tonic-gate /* 3730Sstevel@tonic-gate * Update the phys_avail memory list. 3740Sstevel@tonic-gate * The phys_install list was done at the start. 3750Sstevel@tonic-gate */ 3760Sstevel@tonic-gate 3770Sstevel@tonic-gate memlist_write_lock(); 3780Sstevel@tonic-gate 3790Sstevel@tonic-gate mlret = memlist_add_span((uint64_t)(base) << PAGESHIFT, 3800Sstevel@tonic-gate (uint64_t)(npgs) << PAGESHIFT, &phys_avail); 3810Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK); 3820Sstevel@tonic-gate 3830Sstevel@tonic-gate memlist_write_unlock(); 3840Sstevel@tonic-gate 3850Sstevel@tonic-gate /* See if we can find a memseg to re-use. */ 38610106SJason.Beloro@Sun.COM if (meta_alloc) { 38710106SJason.Beloro@Sun.COM seg = memseg_reuse(0); 38810106SJason.Beloro@Sun.COM reuse = 1; /* force unmapping of temp mapva */ 38910106SJason.Beloro@Sun.COM flags = MEMSEG_DYNAMIC | MEMSEG_META_ALLOC; 39010106SJason.Beloro@Sun.COM /* 39110106SJason.Beloro@Sun.COM * There is a 1:1 fixed relationship between a pfn 39210106SJason.Beloro@Sun.COM * and a page_t VA. The pfn is used as an index into 39310106SJason.Beloro@Sun.COM * the ppvm_base page_t table in order to calculate 39410106SJason.Beloro@Sun.COM * the page_t base address for a given pfn range. 39510106SJason.Beloro@Sun.COM */ 39610106SJason.Beloro@Sun.COM segpp = ppvm_base + base; 39710106SJason.Beloro@Sun.COM } else { 39810106SJason.Beloro@Sun.COM seg = memseg_reuse(metapgs); 39910106SJason.Beloro@Sun.COM reuse = (seg != NULL); 40010106SJason.Beloro@Sun.COM flags = MEMSEG_DYNAMIC | MEMSEG_META_INCL; 40110106SJason.Beloro@Sun.COM segpp = pp; 40210106SJason.Beloro@Sun.COM } 4030Sstevel@tonic-gate 4040Sstevel@tonic-gate /* 4050Sstevel@tonic-gate * Initialize the memseg structure representing this memory 4060Sstevel@tonic-gate * and add it to the existing list of memsegs. Do some basic 4070Sstevel@tonic-gate * initialization and add the memory to the system. 4080Sstevel@tonic-gate * In order to prevent lock deadlocks, the add_physmem() 4090Sstevel@tonic-gate * code is repeated here, but split into several stages. 41010106SJason.Beloro@Sun.COM * 41110106SJason.Beloro@Sun.COM * If a memseg is reused, invalidate memseg pointers in 41210106SJason.Beloro@Sun.COM * all cpu vm caches. We need to do this this since the check 41310106SJason.Beloro@Sun.COM * pp >= seg->pages && pp < seg->epages 41410106SJason.Beloro@Sun.COM * used in various places is not atomic and so the first compare 41510106SJason.Beloro@Sun.COM * can happen before reuse and the second compare after reuse. 41610106SJason.Beloro@Sun.COM * The invalidation ensures that a memseg is not deferenced while 41710106SJason.Beloro@Sun.COM * it's page/pfn pointers are changing. 4180Sstevel@tonic-gate */ 4190Sstevel@tonic-gate if (seg == NULL) { 42010106SJason.Beloro@Sun.COM seg = memseg_alloc(); 42110106SJason.Beloro@Sun.COM ASSERT(seg != NULL); 42210106SJason.Beloro@Sun.COM seg->msegflags = flags; 42310106SJason.Beloro@Sun.COM MEMSEG_DEBUG("memseg_get: alloc seg=0x%p, pages=0x%p", 42410106SJason.Beloro@Sun.COM (void *)seg, (void *)(seg->pages)); 42510106SJason.Beloro@Sun.COM seg->pages = segpp; 4260Sstevel@tonic-gate } else { 42710106SJason.Beloro@Sun.COM ASSERT(seg->msegflags == flags); 42810106SJason.Beloro@Sun.COM ASSERT(seg->pages_base == seg->pages_end); 42910106SJason.Beloro@Sun.COM MEMSEG_DEBUG("memseg_get: reuse seg=0x%p, pages=0x%p", 43010106SJason.Beloro@Sun.COM (void *)seg, (void *)(seg->pages)); 43110106SJason.Beloro@Sun.COM if (meta_alloc) { 43210106SJason.Beloro@Sun.COM memseg_cpu_vm_flush(); 43310106SJason.Beloro@Sun.COM seg->pages = segpp; 43410106SJason.Beloro@Sun.COM } 4350Sstevel@tonic-gate } 4360Sstevel@tonic-gate 4370Sstevel@tonic-gate seg->epages = seg->pages + npgs; 4380Sstevel@tonic-gate seg->pages_base = base; 4390Sstevel@tonic-gate seg->pages_end = base + npgs; 4400Sstevel@tonic-gate 4410Sstevel@tonic-gate /* 4420Sstevel@tonic-gate * Initialize metadata. The page_ts are set to locked state 4430Sstevel@tonic-gate * ready to be freed. 4440Sstevel@tonic-gate */ 4450Sstevel@tonic-gate bzero((caddr_t)pp, ptob(metapgs)); 4460Sstevel@tonic-gate 4470Sstevel@tonic-gate pfn = seg->pages_base; 4480Sstevel@tonic-gate /* Save the original pp base in case we reuse a memseg. */ 4490Sstevel@tonic-gate opp = pp; 4500Sstevel@tonic-gate oepp = opp + npgs; 4510Sstevel@tonic-gate for (pp = opp; pp < oepp; pp++) { 4520Sstevel@tonic-gate pp->p_pagenum = pfn; 4530Sstevel@tonic-gate pfn++; 4540Sstevel@tonic-gate page_iolock_init(pp); 4550Sstevel@tonic-gate while (!page_lock(pp, SE_EXCL, (kmutex_t *)NULL, P_RECLAIM)) 4560Sstevel@tonic-gate continue; 4570Sstevel@tonic-gate pp->p_offset = (u_offset_t)-1; 4580Sstevel@tonic-gate } 4590Sstevel@tonic-gate 4600Sstevel@tonic-gate if (reuse) { 4610Sstevel@tonic-gate /* Remap our page_ts to the re-used memseg VA space. */ 4620Sstevel@tonic-gate pfn = pt_base; 4630Sstevel@tonic-gate vaddr = (caddr_t)seg->pages; 4640Sstevel@tonic-gate for (pnum = 0; pnum < metapgs; pnum++) { 46510106SJason.Beloro@Sun.COM if (meta_alloc) 46610106SJason.Beloro@Sun.COM pfn = memseg_get_metapfn(metabase, 46710106SJason.Beloro@Sun.COM (pgcnt_t)pnum); 4680Sstevel@tonic-gate hat_devload(kas.a_hat, vaddr, ptob(1), pfn, 4690Sstevel@tonic-gate PROT_READ | PROT_WRITE, 4700Sstevel@tonic-gate HAT_LOAD_REMAP | HAT_LOAD | HAT_LOAD_NOCONSIST); 4710Sstevel@tonic-gate pfn++; 4720Sstevel@tonic-gate vaddr += ptob(1); 4730Sstevel@tonic-gate } 4740Sstevel@tonic-gate 4750Sstevel@tonic-gate hat_unload(kas.a_hat, (caddr_t)opp, ptob(metapgs), 4760Sstevel@tonic-gate HAT_UNLOAD_UNMAP|HAT_UNLOAD_UNLOCK); 4770Sstevel@tonic-gate 4780Sstevel@tonic-gate vmem_free(heap_arena, mapva, ptob(metapgs)); 4790Sstevel@tonic-gate } 4800Sstevel@tonic-gate 4810Sstevel@tonic-gate hat_kpm_addmem_mseg_update(seg, nkpmpgs, kpm_pages_off); 4820Sstevel@tonic-gate 4830Sstevel@tonic-gate memsegs_lock(1); 4840Sstevel@tonic-gate 4850Sstevel@tonic-gate /* 4860Sstevel@tonic-gate * The new memseg is inserted at the beginning of the list. 4870Sstevel@tonic-gate * Not only does this save searching for the tail, but in the 4880Sstevel@tonic-gate * case of a re-used memseg, it solves the problem of what 4897694SJakub.Jirsa@Sun.COM * happens if some process has still got a pointer to the 4900Sstevel@tonic-gate * memseg and follows the next pointer to continue traversing 4910Sstevel@tonic-gate * the memsegs list. 4920Sstevel@tonic-gate */ 4930Sstevel@tonic-gate 4940Sstevel@tonic-gate hat_kpm_addmem_mseg_insert(seg); 4950Sstevel@tonic-gate 4960Sstevel@tonic-gate seg->next = memsegs; 4970Sstevel@tonic-gate membar_producer(); 4980Sstevel@tonic-gate 4990Sstevel@tonic-gate hat_kpm_addmem_memsegs_update(seg); 5000Sstevel@tonic-gate 5010Sstevel@tonic-gate memsegs = seg; 5020Sstevel@tonic-gate 5030Sstevel@tonic-gate build_pfn_hash(); 5040Sstevel@tonic-gate 5050Sstevel@tonic-gate total_pages += npgs; 5060Sstevel@tonic-gate 5070Sstevel@tonic-gate /* 5080Sstevel@tonic-gate * Recalculate the paging parameters now total_pages has changed. 5090Sstevel@tonic-gate * This will also cause the clock hands to be reset before next use. 5100Sstevel@tonic-gate */ 5110Sstevel@tonic-gate setupclock(1); 5120Sstevel@tonic-gate 5130Sstevel@tonic-gate memsegs_unlock(1); 5140Sstevel@tonic-gate 5151582Skchow PLCNT_MODIFY_MAX(seg->pages_base, (long)npgs); 5161582Skchow 5170Sstevel@tonic-gate /* 5180Sstevel@tonic-gate * Free the pages outside the lock to avoid locking loops. 5190Sstevel@tonic-gate */ 5200Sstevel@tonic-gate for (pp = seg->pages; pp < seg->epages; pp++) { 5210Sstevel@tonic-gate page_free(pp, 1); 5220Sstevel@tonic-gate } 5230Sstevel@tonic-gate 5240Sstevel@tonic-gate /* 5250Sstevel@tonic-gate * Now that we've updated the appropriate memory lists we 5260Sstevel@tonic-gate * need to reset a number of globals, since we've increased memory. 5270Sstevel@tonic-gate * Several have already been updated for us as noted above. The 5280Sstevel@tonic-gate * globals we're interested in at this point are: 5290Sstevel@tonic-gate * physmax - highest page frame number. 5300Sstevel@tonic-gate * physinstalled - number of pages currently installed (done earlier) 5310Sstevel@tonic-gate * maxmem - max free pages in the system 5320Sstevel@tonic-gate * physmem - physical memory pages available 5330Sstevel@tonic-gate * availrmem - real memory available 5340Sstevel@tonic-gate */ 5350Sstevel@tonic-gate 5360Sstevel@tonic-gate mutex_enter(&freemem_lock); 5370Sstevel@tonic-gate maxmem += npgs; 5380Sstevel@tonic-gate physmem += npgs; 5390Sstevel@tonic-gate availrmem += npgs; 5400Sstevel@tonic-gate availrmem_initial += npgs; 5410Sstevel@tonic-gate 5420Sstevel@tonic-gate mutex_exit(&freemem_lock); 5430Sstevel@tonic-gate 5440Sstevel@tonic-gate dump_resize(); 5450Sstevel@tonic-gate 5460Sstevel@tonic-gate page_freelist_coalesce_all(mnode); 5470Sstevel@tonic-gate 5480Sstevel@tonic-gate kphysm_setup_post_add(npgs); 5490Sstevel@tonic-gate 5500Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_add_memory_dynamic: mem = %ldK " 5510Sstevel@tonic-gate "(0x%" PRIx64 ")\n", 5520Sstevel@tonic-gate physinstalled << (PAGESHIFT - 10), 5530Sstevel@tonic-gate (uint64_t)physinstalled << PAGESHIFT); 5540Sstevel@tonic-gate 5550Sstevel@tonic-gate avmem = (uint64_t)freemem << PAGESHIFT; 5560Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_add_memory_dynamic: " 5570Sstevel@tonic-gate "avail mem = %" PRId64 "\n", avmem); 5580Sstevel@tonic-gate 5590Sstevel@tonic-gate /* 5600Sstevel@tonic-gate * Update lgroup generation number on single lgroup systems 5610Sstevel@tonic-gate */ 5620Sstevel@tonic-gate if (nlgrps == 1) 5630Sstevel@tonic-gate lgrp_config(LGRP_CONFIG_GEN_UPDATE, 0, 0); 5640Sstevel@tonic-gate 5650Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs); 5660Sstevel@tonic-gate return (KPHYSM_OK); /* Successfully added system memory */ 5670Sstevel@tonic-gate 5680Sstevel@tonic-gate } 5690Sstevel@tonic-gate 5700Sstevel@tonic-gate /* 5710Sstevel@tonic-gate * There are various error conditions in kphysm_add_memory_dynamic() 5720Sstevel@tonic-gate * which require a rollback of already changed global state. 5730Sstevel@tonic-gate */ 5740Sstevel@tonic-gate static void 5750Sstevel@tonic-gate kphysm_addmem_error_undospan(pfn_t pt_base, pgcnt_t tpgs) 5760Sstevel@tonic-gate { 5770Sstevel@tonic-gate int mlret; 5780Sstevel@tonic-gate 5790Sstevel@tonic-gate /* Unreserve memory span. */ 5800Sstevel@tonic-gate memlist_write_lock(); 5810Sstevel@tonic-gate 5820Sstevel@tonic-gate mlret = memlist_delete_span( 5830Sstevel@tonic-gate (uint64_t)(pt_base) << PAGESHIFT, 5840Sstevel@tonic-gate (uint64_t)(tpgs) << PAGESHIFT, &phys_install); 5850Sstevel@tonic-gate 5860Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK); 5870Sstevel@tonic-gate phys_install_has_changed(); 5880Sstevel@tonic-gate installed_top_size(phys_install, &physmax, &physinstalled); 5890Sstevel@tonic-gate 5900Sstevel@tonic-gate memlist_write_unlock(); 5910Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs); 5920Sstevel@tonic-gate } 5930Sstevel@tonic-gate 5940Sstevel@tonic-gate /* 59510106SJason.Beloro@Sun.COM * Only return an available memseg of exactly the right size 59610106SJason.Beloro@Sun.COM * if size is required. 5970Sstevel@tonic-gate * When the meta data area has it's own virtual address space 5980Sstevel@tonic-gate * we will need to manage this more carefully and do best fit 5995331Samw * allocations, possibly splitting an available area. 6000Sstevel@tonic-gate */ 60110106SJason.Beloro@Sun.COM struct memseg * 6020Sstevel@tonic-gate memseg_reuse(pgcnt_t metapgs) 6030Sstevel@tonic-gate { 60410106SJason.Beloro@Sun.COM int type; 6050Sstevel@tonic-gate struct memseg **segpp, *seg; 6060Sstevel@tonic-gate 6070Sstevel@tonic-gate mutex_enter(&memseg_lists_lock); 6080Sstevel@tonic-gate 6090Sstevel@tonic-gate segpp = &memseg_va_avail; 6100Sstevel@tonic-gate for (; (seg = *segpp) != NULL; segpp = &seg->lnext) { 6110Sstevel@tonic-gate caddr_t end; 6120Sstevel@tonic-gate 61310106SJason.Beloro@Sun.COM /* 61410106SJason.Beloro@Sun.COM * Make sure we are reusing the right segment type. 61510106SJason.Beloro@Sun.COM */ 61610106SJason.Beloro@Sun.COM type = metapgs ? MEMSEG_META_INCL : MEMSEG_META_ALLOC; 61710106SJason.Beloro@Sun.COM 61810106SJason.Beloro@Sun.COM if ((seg->msegflags & (MEMSEG_META_INCL | MEMSEG_META_ALLOC)) 61910106SJason.Beloro@Sun.COM != type) 62010106SJason.Beloro@Sun.COM continue; 62110106SJason.Beloro@Sun.COM 6220Sstevel@tonic-gate if (kpm_enable) 6230Sstevel@tonic-gate end = hat_kpm_mseg_reuse(seg); 6240Sstevel@tonic-gate else 6250Sstevel@tonic-gate end = (caddr_t)seg->epages; 6260Sstevel@tonic-gate 62710106SJason.Beloro@Sun.COM /* 62810106SJason.Beloro@Sun.COM * Check for the right size if it is provided. 62910106SJason.Beloro@Sun.COM */ 63010106SJason.Beloro@Sun.COM if (!metapgs || btopr(end - (caddr_t)seg->pages) == metapgs) { 6310Sstevel@tonic-gate *segpp = seg->lnext; 6320Sstevel@tonic-gate seg->lnext = NULL; 6330Sstevel@tonic-gate break; 6340Sstevel@tonic-gate } 6350Sstevel@tonic-gate } 6360Sstevel@tonic-gate mutex_exit(&memseg_lists_lock); 6370Sstevel@tonic-gate 6380Sstevel@tonic-gate return (seg); 6390Sstevel@tonic-gate } 6400Sstevel@tonic-gate 6410Sstevel@tonic-gate static uint_t handle_gen; 6420Sstevel@tonic-gate 6430Sstevel@tonic-gate struct memdelspan { 6440Sstevel@tonic-gate struct memdelspan *mds_next; 6450Sstevel@tonic-gate pfn_t mds_base; 6460Sstevel@tonic-gate pgcnt_t mds_npgs; 6470Sstevel@tonic-gate uint_t *mds_bitmap; 6480Sstevel@tonic-gate uint_t *mds_bitmap_retired; 6490Sstevel@tonic-gate }; 6500Sstevel@tonic-gate 6510Sstevel@tonic-gate #define NBPBMW (sizeof (uint_t) * NBBY) 6520Sstevel@tonic-gate #define MDS_BITMAPBYTES(MDSP) \ 6530Sstevel@tonic-gate ((((MDSP)->mds_npgs + NBPBMW - 1) / NBPBMW) * sizeof (uint_t)) 6540Sstevel@tonic-gate 6550Sstevel@tonic-gate struct transit_list { 6560Sstevel@tonic-gate struct transit_list *trl_next; 6570Sstevel@tonic-gate struct memdelspan *trl_spans; 6580Sstevel@tonic-gate int trl_collect; 6590Sstevel@tonic-gate }; 6600Sstevel@tonic-gate 6610Sstevel@tonic-gate struct transit_list_head { 6620Sstevel@tonic-gate kmutex_t trh_lock; 6630Sstevel@tonic-gate struct transit_list *trh_head; 6640Sstevel@tonic-gate }; 6650Sstevel@tonic-gate 6660Sstevel@tonic-gate static struct transit_list_head transit_list_head; 6670Sstevel@tonic-gate 6680Sstevel@tonic-gate struct mem_handle; 6690Sstevel@tonic-gate static void transit_list_collect(struct mem_handle *, int); 6700Sstevel@tonic-gate static void transit_list_insert(struct transit_list *); 6710Sstevel@tonic-gate static void transit_list_remove(struct transit_list *); 6720Sstevel@tonic-gate 6730Sstevel@tonic-gate #ifdef DEBUG 6740Sstevel@tonic-gate #define MEM_DEL_STATS 6750Sstevel@tonic-gate #endif /* DEBUG */ 6760Sstevel@tonic-gate 6770Sstevel@tonic-gate #ifdef MEM_DEL_STATS 6780Sstevel@tonic-gate static int mem_del_stat_print = 0; 6790Sstevel@tonic-gate struct mem_del_stat { 6800Sstevel@tonic-gate uint_t nloop; 6810Sstevel@tonic-gate uint_t need_free; 6820Sstevel@tonic-gate uint_t free_loop; 6830Sstevel@tonic-gate uint_t free_low; 6840Sstevel@tonic-gate uint_t free_failed; 6850Sstevel@tonic-gate uint_t ncheck; 6860Sstevel@tonic-gate uint_t nopaget; 6870Sstevel@tonic-gate uint_t lockfail; 6880Sstevel@tonic-gate uint_t nfree; 6890Sstevel@tonic-gate uint_t nreloc; 6900Sstevel@tonic-gate uint_t nrelocfail; 6910Sstevel@tonic-gate uint_t already_done; 6920Sstevel@tonic-gate uint_t first_notfree; 6930Sstevel@tonic-gate uint_t npplocked; 6940Sstevel@tonic-gate uint_t nlockreloc; 6950Sstevel@tonic-gate uint_t nnorepl; 6960Sstevel@tonic-gate uint_t nmodreloc; 6970Sstevel@tonic-gate uint_t ndestroy; 6980Sstevel@tonic-gate uint_t nputpage; 6990Sstevel@tonic-gate uint_t nnoreclaim; 7000Sstevel@tonic-gate uint_t ndelay; 7010Sstevel@tonic-gate uint_t demotefail; 7020Sstevel@tonic-gate uint64_t nticks_total; 7030Sstevel@tonic-gate uint64_t nticks_pgrp; 7040Sstevel@tonic-gate uint_t retired; 7050Sstevel@tonic-gate uint_t toxic; 7060Sstevel@tonic-gate uint_t failing; 7070Sstevel@tonic-gate uint_t modtoxic; 7080Sstevel@tonic-gate uint_t npplkdtoxic; 7090Sstevel@tonic-gate uint_t gptlmodfail; 7100Sstevel@tonic-gate uint_t gptllckfail; 7110Sstevel@tonic-gate }; 7120Sstevel@tonic-gate /* 7130Sstevel@tonic-gate * The stat values are only incremented in the delete thread 7140Sstevel@tonic-gate * so no locking or atomic required. 7150Sstevel@tonic-gate */ 7160Sstevel@tonic-gate #define MDSTAT_INCR(MHP, FLD) (MHP)->mh_delstat.FLD++ 7170Sstevel@tonic-gate #define MDSTAT_TOTAL(MHP, ntck) ((MHP)->mh_delstat.nticks_total += (ntck)) 7180Sstevel@tonic-gate #define MDSTAT_PGRP(MHP, ntck) ((MHP)->mh_delstat.nticks_pgrp += (ntck)) 7190Sstevel@tonic-gate static void mem_del_stat_print_func(struct mem_handle *); 7200Sstevel@tonic-gate #define MDSTAT_PRINT(MHP) mem_del_stat_print_func((MHP)) 7210Sstevel@tonic-gate #else /* MEM_DEL_STATS */ 7220Sstevel@tonic-gate #define MDSTAT_INCR(MHP, FLD) 7230Sstevel@tonic-gate #define MDSTAT_TOTAL(MHP, ntck) 7240Sstevel@tonic-gate #define MDSTAT_PGRP(MHP, ntck) 7250Sstevel@tonic-gate #define MDSTAT_PRINT(MHP) 7260Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 7270Sstevel@tonic-gate 7280Sstevel@tonic-gate typedef enum mhnd_state {MHND_FREE = 0, MHND_INIT, MHND_STARTING, 7290Sstevel@tonic-gate MHND_RUNNING, MHND_DONE, MHND_RELEASE} mhnd_state_t; 7300Sstevel@tonic-gate 7310Sstevel@tonic-gate /* 7320Sstevel@tonic-gate * mh_mutex must be taken to examine or change mh_exthandle and mh_state. 7330Sstevel@tonic-gate * The mutex may not be required for other fields, dependent on mh_state. 7340Sstevel@tonic-gate */ 7350Sstevel@tonic-gate struct mem_handle { 7360Sstevel@tonic-gate kmutex_t mh_mutex; 7370Sstevel@tonic-gate struct mem_handle *mh_next; 7380Sstevel@tonic-gate memhandle_t mh_exthandle; 7390Sstevel@tonic-gate mhnd_state_t mh_state; 7400Sstevel@tonic-gate struct transit_list mh_transit; 7410Sstevel@tonic-gate pgcnt_t mh_phys_pages; 7420Sstevel@tonic-gate pgcnt_t mh_vm_pages; 7430Sstevel@tonic-gate pgcnt_t mh_hold_todo; 7440Sstevel@tonic-gate void (*mh_delete_complete)(void *, int error); 7450Sstevel@tonic-gate void *mh_delete_complete_arg; 7460Sstevel@tonic-gate volatile uint_t mh_cancel; 7470Sstevel@tonic-gate volatile uint_t mh_dr_aio_cleanup_cancel; 7480Sstevel@tonic-gate volatile uint_t mh_aio_cleanup_done; 7490Sstevel@tonic-gate kcondvar_t mh_cv; 7500Sstevel@tonic-gate kthread_id_t mh_thread_id; 7510Sstevel@tonic-gate page_t *mh_deleted; /* link through p_next */ 7520Sstevel@tonic-gate #ifdef MEM_DEL_STATS 7530Sstevel@tonic-gate struct mem_del_stat mh_delstat; 7540Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 7550Sstevel@tonic-gate }; 7560Sstevel@tonic-gate 7570Sstevel@tonic-gate static struct mem_handle *mem_handle_head; 7580Sstevel@tonic-gate static kmutex_t mem_handle_list_mutex; 7590Sstevel@tonic-gate 7600Sstevel@tonic-gate static struct mem_handle * 7610Sstevel@tonic-gate kphysm_allocate_mem_handle() 7620Sstevel@tonic-gate { 7630Sstevel@tonic-gate struct mem_handle *mhp; 7640Sstevel@tonic-gate 7650Sstevel@tonic-gate mhp = kmem_zalloc(sizeof (struct mem_handle), KM_SLEEP); 7660Sstevel@tonic-gate mutex_init(&mhp->mh_mutex, NULL, MUTEX_DEFAULT, NULL); 7670Sstevel@tonic-gate mutex_enter(&mem_handle_list_mutex); 7680Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 7690Sstevel@tonic-gate /* handle_gen is protected by list mutex. */ 770567Sdmick mhp->mh_exthandle = (memhandle_t)(uintptr_t)(++handle_gen); 7710Sstevel@tonic-gate mhp->mh_next = mem_handle_head; 7720Sstevel@tonic-gate mem_handle_head = mhp; 7730Sstevel@tonic-gate mutex_exit(&mem_handle_list_mutex); 7740Sstevel@tonic-gate 7750Sstevel@tonic-gate return (mhp); 7760Sstevel@tonic-gate } 7770Sstevel@tonic-gate 7780Sstevel@tonic-gate static void 7790Sstevel@tonic-gate kphysm_free_mem_handle(struct mem_handle *mhp) 7800Sstevel@tonic-gate { 7810Sstevel@tonic-gate struct mem_handle **mhpp; 7820Sstevel@tonic-gate 7830Sstevel@tonic-gate ASSERT(mutex_owned(&mhp->mh_mutex)); 7840Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_FREE); 7850Sstevel@tonic-gate /* 7860Sstevel@tonic-gate * Exit the mutex to preserve locking order. This is OK 7870Sstevel@tonic-gate * here as once in the FREE state, the handle cannot 7880Sstevel@tonic-gate * be found by a lookup. 7890Sstevel@tonic-gate */ 7900Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 7910Sstevel@tonic-gate 7920Sstevel@tonic-gate mutex_enter(&mem_handle_list_mutex); 7930Sstevel@tonic-gate mhpp = &mem_handle_head; 7940Sstevel@tonic-gate while (*mhpp != NULL && *mhpp != mhp) 7950Sstevel@tonic-gate mhpp = &(*mhpp)->mh_next; 7960Sstevel@tonic-gate ASSERT(*mhpp == mhp); 7970Sstevel@tonic-gate /* 7980Sstevel@tonic-gate * No need to lock the handle (mh_mutex) as only 7990Sstevel@tonic-gate * mh_next changing and this is the only thread that 8000Sstevel@tonic-gate * can be referncing mhp. 8010Sstevel@tonic-gate */ 8020Sstevel@tonic-gate *mhpp = mhp->mh_next; 8030Sstevel@tonic-gate mutex_exit(&mem_handle_list_mutex); 8040Sstevel@tonic-gate 8050Sstevel@tonic-gate mutex_destroy(&mhp->mh_mutex); 8060Sstevel@tonic-gate kmem_free(mhp, sizeof (struct mem_handle)); 8070Sstevel@tonic-gate } 8080Sstevel@tonic-gate 8090Sstevel@tonic-gate /* 8100Sstevel@tonic-gate * This function finds the internal mem_handle corresponding to an 8110Sstevel@tonic-gate * external handle and returns it with the mh_mutex held. 8120Sstevel@tonic-gate */ 8130Sstevel@tonic-gate static struct mem_handle * 8140Sstevel@tonic-gate kphysm_lookup_mem_handle(memhandle_t handle) 8150Sstevel@tonic-gate { 8160Sstevel@tonic-gate struct mem_handle *mhp; 8170Sstevel@tonic-gate 8180Sstevel@tonic-gate mutex_enter(&mem_handle_list_mutex); 8190Sstevel@tonic-gate for (mhp = mem_handle_head; mhp != NULL; mhp = mhp->mh_next) { 8200Sstevel@tonic-gate if (mhp->mh_exthandle == handle) { 8210Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 8220Sstevel@tonic-gate /* 8230Sstevel@tonic-gate * The state of the handle could have been changed 8240Sstevel@tonic-gate * by kphysm_del_release() while waiting for mh_mutex. 8250Sstevel@tonic-gate */ 8260Sstevel@tonic-gate if (mhp->mh_state == MHND_FREE) { 8270Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 8280Sstevel@tonic-gate continue; 8290Sstevel@tonic-gate } 8300Sstevel@tonic-gate break; 8310Sstevel@tonic-gate } 8320Sstevel@tonic-gate } 8330Sstevel@tonic-gate mutex_exit(&mem_handle_list_mutex); 8340Sstevel@tonic-gate return (mhp); 8350Sstevel@tonic-gate } 8360Sstevel@tonic-gate 8370Sstevel@tonic-gate int 8380Sstevel@tonic-gate kphysm_del_gethandle(memhandle_t *xmhp) 8390Sstevel@tonic-gate { 8400Sstevel@tonic-gate struct mem_handle *mhp; 8410Sstevel@tonic-gate 8420Sstevel@tonic-gate mhp = kphysm_allocate_mem_handle(); 8430Sstevel@tonic-gate /* 8440Sstevel@tonic-gate * The handle is allocated using KM_SLEEP, so cannot fail. 8450Sstevel@tonic-gate * If the implementation is changed, the correct error to return 8460Sstevel@tonic-gate * here would be KPHYSM_ENOHANDLES. 8470Sstevel@tonic-gate */ 8480Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_FREE); 8490Sstevel@tonic-gate mhp->mh_state = MHND_INIT; 8500Sstevel@tonic-gate *xmhp = mhp->mh_exthandle; 8510Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 8520Sstevel@tonic-gate return (KPHYSM_OK); 8530Sstevel@tonic-gate } 8540Sstevel@tonic-gate 8550Sstevel@tonic-gate static int 8560Sstevel@tonic-gate overlapping(pfn_t b1, pgcnt_t l1, pfn_t b2, pgcnt_t l2) 8570Sstevel@tonic-gate { 8580Sstevel@tonic-gate pfn_t e1, e2; 8590Sstevel@tonic-gate 8600Sstevel@tonic-gate e1 = b1 + l1; 8610Sstevel@tonic-gate e2 = b2 + l2; 8620Sstevel@tonic-gate 8630Sstevel@tonic-gate return (!(b2 >= e1 || b1 >= e2)); 8640Sstevel@tonic-gate } 8650Sstevel@tonic-gate 8660Sstevel@tonic-gate static int can_remove_pgs(pgcnt_t); 8670Sstevel@tonic-gate 8680Sstevel@tonic-gate static struct memdelspan * 8690Sstevel@tonic-gate span_to_install(pfn_t base, pgcnt_t npgs) 8700Sstevel@tonic-gate { 8710Sstevel@tonic-gate struct memdelspan *mdsp; 8720Sstevel@tonic-gate struct memdelspan *mdsp_new; 8730Sstevel@tonic-gate uint64_t address, size, thislen; 8740Sstevel@tonic-gate struct memlist *mlp; 8750Sstevel@tonic-gate 8760Sstevel@tonic-gate mdsp_new = NULL; 8770Sstevel@tonic-gate 8780Sstevel@tonic-gate address = (uint64_t)base << PAGESHIFT; 8790Sstevel@tonic-gate size = (uint64_t)npgs << PAGESHIFT; 8800Sstevel@tonic-gate while (size != 0) { 8810Sstevel@tonic-gate memlist_read_lock(); 882*11474SJonathan.Adams@Sun.COM for (mlp = phys_install; mlp != NULL; mlp = mlp->ml_next) { 883*11474SJonathan.Adams@Sun.COM if (address >= (mlp->ml_address + mlp->ml_size)) 8840Sstevel@tonic-gate continue; 885*11474SJonathan.Adams@Sun.COM if ((address + size) > mlp->ml_address) 8860Sstevel@tonic-gate break; 8870Sstevel@tonic-gate } 8880Sstevel@tonic-gate if (mlp == NULL) { 8890Sstevel@tonic-gate address += size; 8900Sstevel@tonic-gate size = 0; 8910Sstevel@tonic-gate thislen = 0; 8920Sstevel@tonic-gate } else { 893*11474SJonathan.Adams@Sun.COM if (address < mlp->ml_address) { 894*11474SJonathan.Adams@Sun.COM size -= (mlp->ml_address - address); 895*11474SJonathan.Adams@Sun.COM address = mlp->ml_address; 8960Sstevel@tonic-gate } 897*11474SJonathan.Adams@Sun.COM ASSERT(address >= mlp->ml_address); 898*11474SJonathan.Adams@Sun.COM if ((address + size) > 899*11474SJonathan.Adams@Sun.COM (mlp->ml_address + mlp->ml_size)) { 900*11474SJonathan.Adams@Sun.COM thislen = 901*11474SJonathan.Adams@Sun.COM mlp->ml_size - (address - mlp->ml_address); 9020Sstevel@tonic-gate } else { 9030Sstevel@tonic-gate thislen = size; 9040Sstevel@tonic-gate } 9050Sstevel@tonic-gate } 9060Sstevel@tonic-gate memlist_read_unlock(); 9070Sstevel@tonic-gate /* TODO: phys_install could change now */ 9080Sstevel@tonic-gate if (thislen == 0) 9090Sstevel@tonic-gate continue; 9100Sstevel@tonic-gate mdsp = kmem_zalloc(sizeof (struct memdelspan), KM_SLEEP); 9110Sstevel@tonic-gate mdsp->mds_base = btop(address); 9120Sstevel@tonic-gate mdsp->mds_npgs = btop(thislen); 9130Sstevel@tonic-gate mdsp->mds_next = mdsp_new; 9140Sstevel@tonic-gate mdsp_new = mdsp; 9150Sstevel@tonic-gate address += thislen; 9160Sstevel@tonic-gate size -= thislen; 9170Sstevel@tonic-gate } 9180Sstevel@tonic-gate return (mdsp_new); 9190Sstevel@tonic-gate } 9200Sstevel@tonic-gate 9210Sstevel@tonic-gate static void 9220Sstevel@tonic-gate free_delspans(struct memdelspan *mdsp) 9230Sstevel@tonic-gate { 9240Sstevel@tonic-gate struct memdelspan *amdsp; 9250Sstevel@tonic-gate 9260Sstevel@tonic-gate while ((amdsp = mdsp) != NULL) { 9270Sstevel@tonic-gate mdsp = amdsp->mds_next; 9280Sstevel@tonic-gate kmem_free(amdsp, sizeof (struct memdelspan)); 9290Sstevel@tonic-gate } 9300Sstevel@tonic-gate } 9310Sstevel@tonic-gate 9320Sstevel@tonic-gate /* 9330Sstevel@tonic-gate * Concatenate lists. No list ordering is required. 9340Sstevel@tonic-gate */ 9350Sstevel@tonic-gate 9360Sstevel@tonic-gate static void 9370Sstevel@tonic-gate delspan_concat(struct memdelspan **mdspp, struct memdelspan *mdsp) 9380Sstevel@tonic-gate { 9390Sstevel@tonic-gate while (*mdspp != NULL) 9400Sstevel@tonic-gate mdspp = &(*mdspp)->mds_next; 9410Sstevel@tonic-gate 9420Sstevel@tonic-gate *mdspp = mdsp; 9430Sstevel@tonic-gate } 9440Sstevel@tonic-gate 9450Sstevel@tonic-gate /* 9460Sstevel@tonic-gate * Given a new list of delspans, check there is no overlap with 9470Sstevel@tonic-gate * all existing span activity (add or delete) and then concatenate 9480Sstevel@tonic-gate * the new spans to the given list. 9490Sstevel@tonic-gate * Return 1 for OK, 0 if overlapping. 9500Sstevel@tonic-gate */ 9510Sstevel@tonic-gate static int 9520Sstevel@tonic-gate delspan_insert( 9530Sstevel@tonic-gate struct transit_list *my_tlp, 9540Sstevel@tonic-gate struct memdelspan *mdsp_new) 9550Sstevel@tonic-gate { 9560Sstevel@tonic-gate struct transit_list_head *trh; 9570Sstevel@tonic-gate struct transit_list *tlp; 9580Sstevel@tonic-gate int ret; 9590Sstevel@tonic-gate 9600Sstevel@tonic-gate trh = &transit_list_head; 9610Sstevel@tonic-gate 9620Sstevel@tonic-gate ASSERT(my_tlp != NULL); 9630Sstevel@tonic-gate ASSERT(mdsp_new != NULL); 9640Sstevel@tonic-gate 9650Sstevel@tonic-gate ret = 1; 9660Sstevel@tonic-gate mutex_enter(&trh->trh_lock); 9670Sstevel@tonic-gate /* ASSERT(my_tlp->trl_spans == NULL || tlp_in_list(trh, my_tlp)); */ 9680Sstevel@tonic-gate for (tlp = trh->trh_head; tlp != NULL; tlp = tlp->trl_next) { 9690Sstevel@tonic-gate struct memdelspan *mdsp; 9700Sstevel@tonic-gate 9710Sstevel@tonic-gate for (mdsp = tlp->trl_spans; mdsp != NULL; 9720Sstevel@tonic-gate mdsp = mdsp->mds_next) { 9730Sstevel@tonic-gate struct memdelspan *nmdsp; 9740Sstevel@tonic-gate 9750Sstevel@tonic-gate for (nmdsp = mdsp_new; nmdsp != NULL; 9760Sstevel@tonic-gate nmdsp = nmdsp->mds_next) { 9770Sstevel@tonic-gate if (overlapping(mdsp->mds_base, mdsp->mds_npgs, 9780Sstevel@tonic-gate nmdsp->mds_base, nmdsp->mds_npgs)) { 9790Sstevel@tonic-gate ret = 0; 9800Sstevel@tonic-gate goto done; 9810Sstevel@tonic-gate } 9820Sstevel@tonic-gate } 9830Sstevel@tonic-gate } 9840Sstevel@tonic-gate } 9850Sstevel@tonic-gate done: 9860Sstevel@tonic-gate if (ret != 0) { 9870Sstevel@tonic-gate if (my_tlp->trl_spans == NULL) 9880Sstevel@tonic-gate transit_list_insert(my_tlp); 9890Sstevel@tonic-gate delspan_concat(&my_tlp->trl_spans, mdsp_new); 9900Sstevel@tonic-gate } 9910Sstevel@tonic-gate mutex_exit(&trh->trh_lock); 9920Sstevel@tonic-gate return (ret); 9930Sstevel@tonic-gate } 9940Sstevel@tonic-gate 9950Sstevel@tonic-gate static void 9960Sstevel@tonic-gate delspan_remove( 9970Sstevel@tonic-gate struct transit_list *my_tlp, 9980Sstevel@tonic-gate pfn_t base, 9990Sstevel@tonic-gate pgcnt_t npgs) 10000Sstevel@tonic-gate { 10010Sstevel@tonic-gate struct transit_list_head *trh; 10020Sstevel@tonic-gate struct memdelspan *mdsp; 10030Sstevel@tonic-gate 10040Sstevel@tonic-gate trh = &transit_list_head; 10050Sstevel@tonic-gate 10060Sstevel@tonic-gate ASSERT(my_tlp != NULL); 10070Sstevel@tonic-gate 10080Sstevel@tonic-gate mutex_enter(&trh->trh_lock); 10090Sstevel@tonic-gate if ((mdsp = my_tlp->trl_spans) != NULL) { 10100Sstevel@tonic-gate if (npgs == 0) { 10110Sstevel@tonic-gate my_tlp->trl_spans = NULL; 10120Sstevel@tonic-gate free_delspans(mdsp); 10130Sstevel@tonic-gate transit_list_remove(my_tlp); 10140Sstevel@tonic-gate } else { 10150Sstevel@tonic-gate struct memdelspan **prv; 10160Sstevel@tonic-gate 10170Sstevel@tonic-gate prv = &my_tlp->trl_spans; 10180Sstevel@tonic-gate while (mdsp != NULL) { 10190Sstevel@tonic-gate pfn_t p_end; 10200Sstevel@tonic-gate 10210Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs; 10220Sstevel@tonic-gate if (mdsp->mds_base >= base && 10230Sstevel@tonic-gate p_end <= (base + npgs)) { 10240Sstevel@tonic-gate *prv = mdsp->mds_next; 10250Sstevel@tonic-gate mdsp->mds_next = NULL; 10260Sstevel@tonic-gate free_delspans(mdsp); 10270Sstevel@tonic-gate } else { 10280Sstevel@tonic-gate prv = &mdsp->mds_next; 10290Sstevel@tonic-gate } 10300Sstevel@tonic-gate mdsp = *prv; 10310Sstevel@tonic-gate } 10320Sstevel@tonic-gate if (my_tlp->trl_spans == NULL) 10330Sstevel@tonic-gate transit_list_remove(my_tlp); 10340Sstevel@tonic-gate } 10350Sstevel@tonic-gate } 10360Sstevel@tonic-gate mutex_exit(&trh->trh_lock); 10370Sstevel@tonic-gate } 10380Sstevel@tonic-gate 10390Sstevel@tonic-gate /* 10400Sstevel@tonic-gate * Reserve interface for add to stop delete before add finished. 10410Sstevel@tonic-gate * This list is only accessed through the delspan_insert/remove 10420Sstevel@tonic-gate * functions and so is fully protected by the mutex in struct transit_list. 10430Sstevel@tonic-gate */ 10440Sstevel@tonic-gate 10450Sstevel@tonic-gate static struct transit_list reserve_transit; 10460Sstevel@tonic-gate 10470Sstevel@tonic-gate static int 10480Sstevel@tonic-gate delspan_reserve(pfn_t base, pgcnt_t npgs) 10490Sstevel@tonic-gate { 10500Sstevel@tonic-gate struct memdelspan *mdsp; 10510Sstevel@tonic-gate int ret; 10520Sstevel@tonic-gate 10530Sstevel@tonic-gate mdsp = kmem_zalloc(sizeof (struct memdelspan), KM_SLEEP); 10540Sstevel@tonic-gate mdsp->mds_base = base; 10550Sstevel@tonic-gate mdsp->mds_npgs = npgs; 10560Sstevel@tonic-gate if ((ret = delspan_insert(&reserve_transit, mdsp)) == 0) { 10570Sstevel@tonic-gate free_delspans(mdsp); 10580Sstevel@tonic-gate } 10590Sstevel@tonic-gate return (ret); 10600Sstevel@tonic-gate } 10610Sstevel@tonic-gate 10620Sstevel@tonic-gate static void 10630Sstevel@tonic-gate delspan_unreserve(pfn_t base, pgcnt_t npgs) 10640Sstevel@tonic-gate { 10650Sstevel@tonic-gate delspan_remove(&reserve_transit, base, npgs); 10660Sstevel@tonic-gate } 10670Sstevel@tonic-gate 10680Sstevel@tonic-gate /* 10690Sstevel@tonic-gate * Return whether memseg was created by kphysm_add_memory_dynamic(). 10700Sstevel@tonic-gate */ 10710Sstevel@tonic-gate static int 107210106SJason.Beloro@Sun.COM memseg_is_dynamic(struct memseg *seg) 10730Sstevel@tonic-gate { 107410106SJason.Beloro@Sun.COM return (seg->msegflags & MEMSEG_DYNAMIC); 10750Sstevel@tonic-gate } 10760Sstevel@tonic-gate 10770Sstevel@tonic-gate int 10780Sstevel@tonic-gate kphysm_del_span( 10790Sstevel@tonic-gate memhandle_t handle, 10800Sstevel@tonic-gate pfn_t base, 10810Sstevel@tonic-gate pgcnt_t npgs) 10820Sstevel@tonic-gate { 10830Sstevel@tonic-gate struct mem_handle *mhp; 10840Sstevel@tonic-gate struct memseg *seg; 10850Sstevel@tonic-gate struct memdelspan *mdsp; 10860Sstevel@tonic-gate struct memdelspan *mdsp_new; 10870Sstevel@tonic-gate pgcnt_t phys_pages, vm_pages; 10880Sstevel@tonic-gate pfn_t p_end; 10890Sstevel@tonic-gate page_t *pp; 10900Sstevel@tonic-gate int ret; 10910Sstevel@tonic-gate 10920Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle); 10930Sstevel@tonic-gate if (mhp == NULL) { 10940Sstevel@tonic-gate return (KPHYSM_EHANDLE); 10950Sstevel@tonic-gate } 10960Sstevel@tonic-gate if (mhp->mh_state != MHND_INIT) { 10970Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 10980Sstevel@tonic-gate return (KPHYSM_ESEQUENCE); 10990Sstevel@tonic-gate } 11000Sstevel@tonic-gate 11010Sstevel@tonic-gate /* 11020Sstevel@tonic-gate * Intersect the span with the installed memory list (phys_install). 11030Sstevel@tonic-gate */ 11040Sstevel@tonic-gate mdsp_new = span_to_install(base, npgs); 11050Sstevel@tonic-gate if (mdsp_new == NULL) { 11060Sstevel@tonic-gate /* 11070Sstevel@tonic-gate * No physical memory in this range. Is this an 11080Sstevel@tonic-gate * error? If an attempt to start the delete is made 11090Sstevel@tonic-gate * for OK returns from del_span such as this, start will 11100Sstevel@tonic-gate * return an error. 11110Sstevel@tonic-gate * Could return KPHYSM_ENOWORK. 11120Sstevel@tonic-gate */ 11130Sstevel@tonic-gate /* 11140Sstevel@tonic-gate * It is assumed that there are no error returns 11150Sstevel@tonic-gate * from span_to_install() due to kmem_alloc failure. 11160Sstevel@tonic-gate */ 11170Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 11180Sstevel@tonic-gate return (KPHYSM_OK); 11190Sstevel@tonic-gate } 11200Sstevel@tonic-gate /* 11210Sstevel@tonic-gate * Does this span overlap an existing span? 11220Sstevel@tonic-gate */ 11230Sstevel@tonic-gate if (delspan_insert(&mhp->mh_transit, mdsp_new) == 0) { 11240Sstevel@tonic-gate /* 11250Sstevel@tonic-gate * Differentiate between already on list for this handle 11260Sstevel@tonic-gate * (KPHYSM_EDUP) and busy elsewhere (KPHYSM_EBUSY). 11270Sstevel@tonic-gate */ 11280Sstevel@tonic-gate ret = KPHYSM_EBUSY; 11290Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 11300Sstevel@tonic-gate mdsp = mdsp->mds_next) { 11310Sstevel@tonic-gate if (overlapping(mdsp->mds_base, mdsp->mds_npgs, 11320Sstevel@tonic-gate base, npgs)) { 11330Sstevel@tonic-gate ret = KPHYSM_EDUP; 11340Sstevel@tonic-gate break; 11350Sstevel@tonic-gate } 11360Sstevel@tonic-gate } 11370Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 11380Sstevel@tonic-gate free_delspans(mdsp_new); 11390Sstevel@tonic-gate return (ret); 11400Sstevel@tonic-gate } 11410Sstevel@tonic-gate /* 11420Sstevel@tonic-gate * At this point the spans in mdsp_new have been inserted into the 11430Sstevel@tonic-gate * list of spans for this handle and thereby to the global list of 11440Sstevel@tonic-gate * spans being processed. Each of these spans must now be checked 11450Sstevel@tonic-gate * for relocatability. As a side-effect segments in the memseg list 11460Sstevel@tonic-gate * may be split. 11470Sstevel@tonic-gate * 11480Sstevel@tonic-gate * Note that mdsp_new can no longer be used as it is now part of 11490Sstevel@tonic-gate * a larger list. Select elements of this larger list based 11500Sstevel@tonic-gate * on base and npgs. 11510Sstevel@tonic-gate */ 11520Sstevel@tonic-gate restart: 11530Sstevel@tonic-gate phys_pages = 0; 11540Sstevel@tonic-gate vm_pages = 0; 11550Sstevel@tonic-gate ret = KPHYSM_OK; 11560Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 11570Sstevel@tonic-gate mdsp = mdsp->mds_next) { 11580Sstevel@tonic-gate pgcnt_t pages_checked; 11590Sstevel@tonic-gate 11600Sstevel@tonic-gate if (!overlapping(mdsp->mds_base, mdsp->mds_npgs, base, npgs)) { 11610Sstevel@tonic-gate continue; 11620Sstevel@tonic-gate } 11630Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs; 11640Sstevel@tonic-gate /* 11650Sstevel@tonic-gate * The pages_checked count is a hack. All pages should be 11660Sstevel@tonic-gate * checked for relocatability. Those not covered by memsegs 11670Sstevel@tonic-gate * should be tested with arch_kphysm_del_span_ok(). 11680Sstevel@tonic-gate */ 11690Sstevel@tonic-gate pages_checked = 0; 11700Sstevel@tonic-gate for (seg = memsegs; seg; seg = seg->next) { 11710Sstevel@tonic-gate pfn_t mseg_start; 11720Sstevel@tonic-gate 11730Sstevel@tonic-gate if (seg->pages_base >= p_end || 11740Sstevel@tonic-gate seg->pages_end <= mdsp->mds_base) { 11750Sstevel@tonic-gate /* Span and memseg don't overlap. */ 11760Sstevel@tonic-gate continue; 11770Sstevel@tonic-gate } 117810106SJason.Beloro@Sun.COM mseg_start = memseg_get_start(seg); 11790Sstevel@tonic-gate /* Check that segment is suitable for delete. */ 118010106SJason.Beloro@Sun.COM if (memseg_includes_meta(seg)) { 11810Sstevel@tonic-gate /* 118210106SJason.Beloro@Sun.COM * Check that this segment is completely 118310106SJason.Beloro@Sun.COM * within the span. 11840Sstevel@tonic-gate */ 11850Sstevel@tonic-gate if (mseg_start < mdsp->mds_base || 11860Sstevel@tonic-gate seg->pages_end > p_end) { 11870Sstevel@tonic-gate ret = KPHYSM_EBUSY; 11880Sstevel@tonic-gate break; 11890Sstevel@tonic-gate } 11900Sstevel@tonic-gate pages_checked += seg->pages_end - mseg_start; 11910Sstevel@tonic-gate } else { 11920Sstevel@tonic-gate /* 11930Sstevel@tonic-gate * If this segment is larger than the span, 11940Sstevel@tonic-gate * try to split it. After the split, it 11950Sstevel@tonic-gate * is necessary to restart. 11960Sstevel@tonic-gate */ 11970Sstevel@tonic-gate if (seg->pages_base < mdsp->mds_base || 11980Sstevel@tonic-gate seg->pages_end > p_end) { 11990Sstevel@tonic-gate pfn_t abase; 12000Sstevel@tonic-gate pgcnt_t anpgs; 12010Sstevel@tonic-gate int s_ret; 12020Sstevel@tonic-gate 12030Sstevel@tonic-gate /* Split required. */ 12040Sstevel@tonic-gate if (mdsp->mds_base < seg->pages_base) 12050Sstevel@tonic-gate abase = seg->pages_base; 12060Sstevel@tonic-gate else 12070Sstevel@tonic-gate abase = mdsp->mds_base; 12080Sstevel@tonic-gate if (p_end > seg->pages_end) 12090Sstevel@tonic-gate anpgs = seg->pages_end - abase; 12100Sstevel@tonic-gate else 12110Sstevel@tonic-gate anpgs = p_end - abase; 12120Sstevel@tonic-gate s_ret = kphysm_split_memseg(abase, 12130Sstevel@tonic-gate anpgs); 12140Sstevel@tonic-gate if (s_ret == 0) { 12150Sstevel@tonic-gate /* Split failed. */ 12160Sstevel@tonic-gate ret = KPHYSM_ERESOURCE; 12170Sstevel@tonic-gate break; 12180Sstevel@tonic-gate } 12190Sstevel@tonic-gate goto restart; 12200Sstevel@tonic-gate } 12210Sstevel@tonic-gate pages_checked += 12220Sstevel@tonic-gate seg->pages_end - seg->pages_base; 12230Sstevel@tonic-gate } 12240Sstevel@tonic-gate /* 12250Sstevel@tonic-gate * The memseg is wholly within the delete span. 12260Sstevel@tonic-gate * The individual pages can now be checked. 12270Sstevel@tonic-gate */ 12280Sstevel@tonic-gate /* Cage test. */ 12290Sstevel@tonic-gate for (pp = seg->pages; pp < seg->epages; pp++) { 12300Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 12310Sstevel@tonic-gate ret = KPHYSM_ENONRELOC; 12320Sstevel@tonic-gate break; 12330Sstevel@tonic-gate } 12340Sstevel@tonic-gate } 12350Sstevel@tonic-gate if (ret != KPHYSM_OK) { 12360Sstevel@tonic-gate break; 12370Sstevel@tonic-gate } 12380Sstevel@tonic-gate phys_pages += (seg->pages_end - mseg_start); 12390Sstevel@tonic-gate vm_pages += MSEG_NPAGES(seg); 12400Sstevel@tonic-gate } 12410Sstevel@tonic-gate if (ret != KPHYSM_OK) 12420Sstevel@tonic-gate break; 12430Sstevel@tonic-gate if (pages_checked != mdsp->mds_npgs) { 12440Sstevel@tonic-gate ret = KPHYSM_ENONRELOC; 12450Sstevel@tonic-gate break; 12460Sstevel@tonic-gate } 12470Sstevel@tonic-gate } 12480Sstevel@tonic-gate 12490Sstevel@tonic-gate if (ret == KPHYSM_OK) { 12500Sstevel@tonic-gate mhp->mh_phys_pages += phys_pages; 12510Sstevel@tonic-gate mhp->mh_vm_pages += vm_pages; 12520Sstevel@tonic-gate } else { 12530Sstevel@tonic-gate /* 12540Sstevel@tonic-gate * Keep holding the mh_mutex to prevent it going away. 12550Sstevel@tonic-gate */ 12560Sstevel@tonic-gate delspan_remove(&mhp->mh_transit, base, npgs); 12570Sstevel@tonic-gate } 12580Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 12590Sstevel@tonic-gate return (ret); 12600Sstevel@tonic-gate } 12610Sstevel@tonic-gate 12620Sstevel@tonic-gate int 12630Sstevel@tonic-gate kphysm_del_span_query( 12640Sstevel@tonic-gate pfn_t base, 12650Sstevel@tonic-gate pgcnt_t npgs, 12660Sstevel@tonic-gate memquery_t *mqp) 12670Sstevel@tonic-gate { 12680Sstevel@tonic-gate struct memdelspan *mdsp; 12690Sstevel@tonic-gate struct memdelspan *mdsp_new; 12700Sstevel@tonic-gate int done_first_nonreloc; 12710Sstevel@tonic-gate 12720Sstevel@tonic-gate mqp->phys_pages = 0; 12730Sstevel@tonic-gate mqp->managed = 0; 12740Sstevel@tonic-gate mqp->nonrelocatable = 0; 12750Sstevel@tonic-gate mqp->first_nonrelocatable = 0; 12760Sstevel@tonic-gate mqp->last_nonrelocatable = 0; 12770Sstevel@tonic-gate 12780Sstevel@tonic-gate mdsp_new = span_to_install(base, npgs); 12790Sstevel@tonic-gate /* 12800Sstevel@tonic-gate * It is OK to proceed here if mdsp_new == NULL. 12810Sstevel@tonic-gate */ 12820Sstevel@tonic-gate done_first_nonreloc = 0; 12830Sstevel@tonic-gate for (mdsp = mdsp_new; mdsp != NULL; mdsp = mdsp->mds_next) { 12840Sstevel@tonic-gate pfn_t sbase; 12850Sstevel@tonic-gate pgcnt_t snpgs; 12860Sstevel@tonic-gate 12870Sstevel@tonic-gate mqp->phys_pages += mdsp->mds_npgs; 12880Sstevel@tonic-gate sbase = mdsp->mds_base; 12890Sstevel@tonic-gate snpgs = mdsp->mds_npgs; 12900Sstevel@tonic-gate while (snpgs != 0) { 12910Sstevel@tonic-gate struct memseg *lseg, *seg; 12920Sstevel@tonic-gate pfn_t p_end; 12930Sstevel@tonic-gate page_t *pp; 12940Sstevel@tonic-gate pfn_t mseg_start; 12950Sstevel@tonic-gate 12960Sstevel@tonic-gate p_end = sbase + snpgs; 12970Sstevel@tonic-gate /* 12980Sstevel@tonic-gate * Find the lowest addressed memseg that starts 12990Sstevel@tonic-gate * after sbase and account for it. 13000Sstevel@tonic-gate * This is to catch dynamic memsegs whose start 13010Sstevel@tonic-gate * is hidden. 13020Sstevel@tonic-gate */ 13030Sstevel@tonic-gate seg = NULL; 13040Sstevel@tonic-gate for (lseg = memsegs; lseg != NULL; lseg = lseg->next) { 13050Sstevel@tonic-gate if ((lseg->pages_base >= sbase) || 13060Sstevel@tonic-gate (lseg->pages_base < p_end && 13070Sstevel@tonic-gate lseg->pages_end > sbase)) { 13080Sstevel@tonic-gate if (seg == NULL || 13090Sstevel@tonic-gate seg->pages_base > lseg->pages_base) 13100Sstevel@tonic-gate seg = lseg; 13110Sstevel@tonic-gate } 13120Sstevel@tonic-gate } 13130Sstevel@tonic-gate if (seg != NULL) { 131410106SJason.Beloro@Sun.COM mseg_start = memseg_get_start(seg); 13150Sstevel@tonic-gate /* 13160Sstevel@tonic-gate * Now have the full extent of the memseg so 13170Sstevel@tonic-gate * do the range check. 13180Sstevel@tonic-gate */ 13190Sstevel@tonic-gate if (mseg_start >= p_end || 13200Sstevel@tonic-gate seg->pages_end <= sbase) { 13210Sstevel@tonic-gate /* Span does not overlap memseg. */ 13220Sstevel@tonic-gate seg = NULL; 13230Sstevel@tonic-gate } 13240Sstevel@tonic-gate } 13250Sstevel@tonic-gate /* 13260Sstevel@tonic-gate * Account for gap either before the segment if 13270Sstevel@tonic-gate * there is one or to the end of the span. 13280Sstevel@tonic-gate */ 13290Sstevel@tonic-gate if (seg == NULL || mseg_start > sbase) { 13300Sstevel@tonic-gate pfn_t a_end; 13310Sstevel@tonic-gate 13320Sstevel@tonic-gate a_end = (seg == NULL) ? p_end : mseg_start; 13330Sstevel@tonic-gate /* 13340Sstevel@tonic-gate * Check with arch layer for relocatability. 13350Sstevel@tonic-gate */ 13360Sstevel@tonic-gate if (arch_kphysm_del_span_ok(sbase, 13370Sstevel@tonic-gate (a_end - sbase))) { 13380Sstevel@tonic-gate /* 13390Sstevel@tonic-gate * No non-relocatble pages in this 13400Sstevel@tonic-gate * area, avoid the fine-grained 13410Sstevel@tonic-gate * test. 13420Sstevel@tonic-gate */ 13430Sstevel@tonic-gate snpgs -= (a_end - sbase); 13440Sstevel@tonic-gate sbase = a_end; 13450Sstevel@tonic-gate } 13460Sstevel@tonic-gate while (sbase < a_end) { 13470Sstevel@tonic-gate if (!arch_kphysm_del_span_ok(sbase, 13480Sstevel@tonic-gate 1)) { 13490Sstevel@tonic-gate mqp->nonrelocatable++; 13500Sstevel@tonic-gate if (!done_first_nonreloc) { 13510Sstevel@tonic-gate mqp-> 13520Sstevel@tonic-gate first_nonrelocatable 13530Sstevel@tonic-gate = sbase; 13540Sstevel@tonic-gate done_first_nonreloc = 1; 13550Sstevel@tonic-gate } 13560Sstevel@tonic-gate mqp->last_nonrelocatable = 13570Sstevel@tonic-gate sbase; 13580Sstevel@tonic-gate } 13590Sstevel@tonic-gate sbase++; 13600Sstevel@tonic-gate snpgs--; 13610Sstevel@tonic-gate } 13620Sstevel@tonic-gate } 13630Sstevel@tonic-gate if (seg != NULL) { 13640Sstevel@tonic-gate ASSERT(mseg_start <= sbase); 13650Sstevel@tonic-gate if (seg->pages_base != mseg_start && 13660Sstevel@tonic-gate seg->pages_base > sbase) { 13670Sstevel@tonic-gate pgcnt_t skip_pgs; 13680Sstevel@tonic-gate 13690Sstevel@tonic-gate /* 13700Sstevel@tonic-gate * Skip the page_t area of a 13710Sstevel@tonic-gate * dynamic memseg. 13720Sstevel@tonic-gate */ 13730Sstevel@tonic-gate skip_pgs = seg->pages_base - sbase; 13740Sstevel@tonic-gate if (snpgs <= skip_pgs) { 13750Sstevel@tonic-gate sbase += snpgs; 13760Sstevel@tonic-gate snpgs = 0; 13770Sstevel@tonic-gate continue; 13780Sstevel@tonic-gate } 13790Sstevel@tonic-gate snpgs -= skip_pgs; 13800Sstevel@tonic-gate sbase += skip_pgs; 13810Sstevel@tonic-gate } 13820Sstevel@tonic-gate ASSERT(snpgs != 0); 13830Sstevel@tonic-gate ASSERT(seg->pages_base <= sbase); 13840Sstevel@tonic-gate /* 13850Sstevel@tonic-gate * The individual pages can now be checked. 13860Sstevel@tonic-gate */ 13870Sstevel@tonic-gate for (pp = seg->pages + 13880Sstevel@tonic-gate (sbase - seg->pages_base); 13890Sstevel@tonic-gate snpgs != 0 && pp < seg->epages; pp++) { 13900Sstevel@tonic-gate mqp->managed++; 13910Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 13920Sstevel@tonic-gate mqp->nonrelocatable++; 13930Sstevel@tonic-gate if (!done_first_nonreloc) { 13940Sstevel@tonic-gate mqp-> 13950Sstevel@tonic-gate first_nonrelocatable 13960Sstevel@tonic-gate = sbase; 13970Sstevel@tonic-gate done_first_nonreloc = 1; 13980Sstevel@tonic-gate } 13990Sstevel@tonic-gate mqp->last_nonrelocatable = 14000Sstevel@tonic-gate sbase; 14010Sstevel@tonic-gate } 14020Sstevel@tonic-gate sbase++; 14030Sstevel@tonic-gate snpgs--; 14040Sstevel@tonic-gate } 14050Sstevel@tonic-gate } 14060Sstevel@tonic-gate } 14070Sstevel@tonic-gate } 14080Sstevel@tonic-gate 14090Sstevel@tonic-gate free_delspans(mdsp_new); 14100Sstevel@tonic-gate 14110Sstevel@tonic-gate return (KPHYSM_OK); 14120Sstevel@tonic-gate } 14130Sstevel@tonic-gate 14140Sstevel@tonic-gate /* 14150Sstevel@tonic-gate * This release function can be called at any stage as follows: 14160Sstevel@tonic-gate * _gethandle only called 14170Sstevel@tonic-gate * _span(s) only called 14180Sstevel@tonic-gate * _start called but failed 14190Sstevel@tonic-gate * delete thread exited 14200Sstevel@tonic-gate */ 14210Sstevel@tonic-gate int 14220Sstevel@tonic-gate kphysm_del_release(memhandle_t handle) 14230Sstevel@tonic-gate { 14240Sstevel@tonic-gate struct mem_handle *mhp; 14250Sstevel@tonic-gate 14260Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle); 14270Sstevel@tonic-gate if (mhp == NULL) { 14280Sstevel@tonic-gate return (KPHYSM_EHANDLE); 14290Sstevel@tonic-gate } 14300Sstevel@tonic-gate switch (mhp->mh_state) { 14310Sstevel@tonic-gate case MHND_STARTING: 14320Sstevel@tonic-gate case MHND_RUNNING: 14330Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 14340Sstevel@tonic-gate return (KPHYSM_ENOTFINISHED); 14350Sstevel@tonic-gate case MHND_FREE: 14360Sstevel@tonic-gate ASSERT(mhp->mh_state != MHND_FREE); 14370Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 14380Sstevel@tonic-gate return (KPHYSM_EHANDLE); 14390Sstevel@tonic-gate case MHND_INIT: 14400Sstevel@tonic-gate break; 14410Sstevel@tonic-gate case MHND_DONE: 14420Sstevel@tonic-gate break; 14430Sstevel@tonic-gate case MHND_RELEASE: 14440Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 14450Sstevel@tonic-gate return (KPHYSM_ESEQUENCE); 14460Sstevel@tonic-gate default: 14470Sstevel@tonic-gate #ifdef DEBUG 14480Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_del_release(0x%p) state corrupt %d", 14490Sstevel@tonic-gate (void *)mhp, mhp->mh_state); 14500Sstevel@tonic-gate #endif /* DEBUG */ 14510Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 14520Sstevel@tonic-gate return (KPHYSM_EHANDLE); 14530Sstevel@tonic-gate } 14540Sstevel@tonic-gate /* 14550Sstevel@tonic-gate * Set state so that we can wait if necessary. 14560Sstevel@tonic-gate * Also this means that we have read/write access to all 14570Sstevel@tonic-gate * fields except mh_exthandle and mh_state. 14580Sstevel@tonic-gate */ 14590Sstevel@tonic-gate mhp->mh_state = MHND_RELEASE; 14600Sstevel@tonic-gate /* 14610Sstevel@tonic-gate * The mem_handle cannot be de-allocated by any other operation 14620Sstevel@tonic-gate * now, so no need to hold mh_mutex. 14630Sstevel@tonic-gate */ 14640Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 14650Sstevel@tonic-gate 14660Sstevel@tonic-gate delspan_remove(&mhp->mh_transit, 0, 0); 14670Sstevel@tonic-gate mhp->mh_phys_pages = 0; 14680Sstevel@tonic-gate mhp->mh_vm_pages = 0; 14690Sstevel@tonic-gate mhp->mh_hold_todo = 0; 14700Sstevel@tonic-gate mhp->mh_delete_complete = NULL; 14710Sstevel@tonic-gate mhp->mh_delete_complete_arg = NULL; 14720Sstevel@tonic-gate mhp->mh_cancel = 0; 14730Sstevel@tonic-gate 14740Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 14750Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_RELEASE); 14760Sstevel@tonic-gate mhp->mh_state = MHND_FREE; 14770Sstevel@tonic-gate 14780Sstevel@tonic-gate kphysm_free_mem_handle(mhp); 14790Sstevel@tonic-gate 14800Sstevel@tonic-gate return (KPHYSM_OK); 14810Sstevel@tonic-gate } 14820Sstevel@tonic-gate 14830Sstevel@tonic-gate /* 14840Sstevel@tonic-gate * This cancel function can only be called with the thread running. 14850Sstevel@tonic-gate */ 14860Sstevel@tonic-gate int 14870Sstevel@tonic-gate kphysm_del_cancel(memhandle_t handle) 14880Sstevel@tonic-gate { 14890Sstevel@tonic-gate struct mem_handle *mhp; 14900Sstevel@tonic-gate 14910Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle); 14920Sstevel@tonic-gate if (mhp == NULL) { 14930Sstevel@tonic-gate return (KPHYSM_EHANDLE); 14940Sstevel@tonic-gate } 14950Sstevel@tonic-gate if (mhp->mh_state != MHND_STARTING && mhp->mh_state != MHND_RUNNING) { 14960Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 14970Sstevel@tonic-gate return (KPHYSM_ENOTRUNNING); 14980Sstevel@tonic-gate } 14990Sstevel@tonic-gate /* 15000Sstevel@tonic-gate * Set the cancel flag and wake the delete thread up. 15010Sstevel@tonic-gate * The thread may be waiting on I/O, so the effect of the cancel 15020Sstevel@tonic-gate * may be delayed. 15030Sstevel@tonic-gate */ 15040Sstevel@tonic-gate if (mhp->mh_cancel == 0) { 15050Sstevel@tonic-gate mhp->mh_cancel = KPHYSM_ECANCELLED; 15060Sstevel@tonic-gate cv_signal(&mhp->mh_cv); 15070Sstevel@tonic-gate } 15080Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 15090Sstevel@tonic-gate return (KPHYSM_OK); 15100Sstevel@tonic-gate } 15110Sstevel@tonic-gate 15120Sstevel@tonic-gate int 15130Sstevel@tonic-gate kphysm_del_status( 15140Sstevel@tonic-gate memhandle_t handle, 15150Sstevel@tonic-gate memdelstat_t *mdstp) 15160Sstevel@tonic-gate { 15170Sstevel@tonic-gate struct mem_handle *mhp; 15180Sstevel@tonic-gate 15190Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle); 15200Sstevel@tonic-gate if (mhp == NULL) { 15210Sstevel@tonic-gate return (KPHYSM_EHANDLE); 15220Sstevel@tonic-gate } 15230Sstevel@tonic-gate /* 15240Sstevel@tonic-gate * Calling kphysm_del_status() is allowed before the delete 15250Sstevel@tonic-gate * is started to allow for status display. 15260Sstevel@tonic-gate */ 15270Sstevel@tonic-gate if (mhp->mh_state != MHND_INIT && mhp->mh_state != MHND_STARTING && 15280Sstevel@tonic-gate mhp->mh_state != MHND_RUNNING) { 15290Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 15300Sstevel@tonic-gate return (KPHYSM_ENOTRUNNING); 15310Sstevel@tonic-gate } 15320Sstevel@tonic-gate mdstp->phys_pages = mhp->mh_phys_pages; 15330Sstevel@tonic-gate mdstp->managed = mhp->mh_vm_pages; 15340Sstevel@tonic-gate mdstp->collected = mhp->mh_vm_pages - mhp->mh_hold_todo; 15350Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 15360Sstevel@tonic-gate return (KPHYSM_OK); 15370Sstevel@tonic-gate } 15380Sstevel@tonic-gate 15390Sstevel@tonic-gate static int mem_delete_additional_pages = 100; 15400Sstevel@tonic-gate 15410Sstevel@tonic-gate static int 15420Sstevel@tonic-gate can_remove_pgs(pgcnt_t npgs) 15430Sstevel@tonic-gate { 15440Sstevel@tonic-gate /* 15450Sstevel@tonic-gate * If all pageable pages were paged out, freemem would 15460Sstevel@tonic-gate * equal availrmem. There is a minimum requirement for 15470Sstevel@tonic-gate * availrmem. 15480Sstevel@tonic-gate */ 15490Sstevel@tonic-gate if ((availrmem - (tune.t_minarmem + mem_delete_additional_pages)) 15500Sstevel@tonic-gate < npgs) 15510Sstevel@tonic-gate return (0); 15520Sstevel@tonic-gate /* TODO: check swap space, etc. */ 15530Sstevel@tonic-gate return (1); 15540Sstevel@tonic-gate } 15550Sstevel@tonic-gate 15560Sstevel@tonic-gate static int 15570Sstevel@tonic-gate get_availrmem(pgcnt_t npgs) 15580Sstevel@tonic-gate { 15590Sstevel@tonic-gate int ret; 15600Sstevel@tonic-gate 15610Sstevel@tonic-gate mutex_enter(&freemem_lock); 15620Sstevel@tonic-gate ret = can_remove_pgs(npgs); 15630Sstevel@tonic-gate if (ret != 0) 15640Sstevel@tonic-gate availrmem -= npgs; 15650Sstevel@tonic-gate mutex_exit(&freemem_lock); 15660Sstevel@tonic-gate return (ret); 15670Sstevel@tonic-gate } 15680Sstevel@tonic-gate 15690Sstevel@tonic-gate static void 15700Sstevel@tonic-gate put_availrmem(pgcnt_t npgs) 15710Sstevel@tonic-gate { 15720Sstevel@tonic-gate mutex_enter(&freemem_lock); 15730Sstevel@tonic-gate availrmem += npgs; 15740Sstevel@tonic-gate mutex_exit(&freemem_lock); 15750Sstevel@tonic-gate } 15760Sstevel@tonic-gate 15770Sstevel@tonic-gate #define FREEMEM_INCR 100 15780Sstevel@tonic-gate static pgcnt_t freemem_incr = FREEMEM_INCR; 15790Sstevel@tonic-gate #define DEL_FREE_WAIT_FRAC 4 15800Sstevel@tonic-gate #define DEL_FREE_WAIT_TICKS ((hz+DEL_FREE_WAIT_FRAC-1)/DEL_FREE_WAIT_FRAC) 15810Sstevel@tonic-gate 15820Sstevel@tonic-gate #define DEL_BUSY_WAIT_FRAC 20 15830Sstevel@tonic-gate #define DEL_BUSY_WAIT_TICKS ((hz+DEL_BUSY_WAIT_FRAC-1)/DEL_BUSY_WAIT_FRAC) 15840Sstevel@tonic-gate 15850Sstevel@tonic-gate static void kphysm_del_cleanup(struct mem_handle *); 15860Sstevel@tonic-gate 15870Sstevel@tonic-gate static void page_delete_collect(page_t *, struct mem_handle *); 15880Sstevel@tonic-gate 15890Sstevel@tonic-gate static pgcnt_t 15900Sstevel@tonic-gate delthr_get_freemem(struct mem_handle *mhp) 15910Sstevel@tonic-gate { 15920Sstevel@tonic-gate pgcnt_t free_get; 15930Sstevel@tonic-gate int ret; 15940Sstevel@tonic-gate 15950Sstevel@tonic-gate ASSERT(MUTEX_HELD(&mhp->mh_mutex)); 15960Sstevel@tonic-gate 15970Sstevel@tonic-gate MDSTAT_INCR(mhp, need_free); 15980Sstevel@tonic-gate /* 15990Sstevel@tonic-gate * Get up to freemem_incr pages. 16000Sstevel@tonic-gate */ 16010Sstevel@tonic-gate free_get = freemem_incr; 16020Sstevel@tonic-gate if (free_get > mhp->mh_hold_todo) 16030Sstevel@tonic-gate free_get = mhp->mh_hold_todo; 16040Sstevel@tonic-gate /* 16050Sstevel@tonic-gate * Take free_get pages away from freemem, 16060Sstevel@tonic-gate * waiting if necessary. 16070Sstevel@tonic-gate */ 16080Sstevel@tonic-gate 16090Sstevel@tonic-gate while (!mhp->mh_cancel) { 16100Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 16110Sstevel@tonic-gate MDSTAT_INCR(mhp, free_loop); 16120Sstevel@tonic-gate /* 16130Sstevel@tonic-gate * Duplicate test from page_create_throttle() 16140Sstevel@tonic-gate * but don't override with !PG_WAIT. 16150Sstevel@tonic-gate */ 16160Sstevel@tonic-gate if (freemem < (free_get + throttlefree)) { 16170Sstevel@tonic-gate MDSTAT_INCR(mhp, free_low); 16180Sstevel@tonic-gate ret = 0; 16190Sstevel@tonic-gate } else { 16200Sstevel@tonic-gate ret = page_create_wait(free_get, 0); 16210Sstevel@tonic-gate if (ret == 0) { 16220Sstevel@tonic-gate /* EMPTY */ 16230Sstevel@tonic-gate MDSTAT_INCR(mhp, free_failed); 16240Sstevel@tonic-gate } 16250Sstevel@tonic-gate } 16260Sstevel@tonic-gate if (ret != 0) { 16270Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 16280Sstevel@tonic-gate return (free_get); 16290Sstevel@tonic-gate } 16300Sstevel@tonic-gate 16310Sstevel@tonic-gate /* 16320Sstevel@tonic-gate * Put pressure on pageout. 16330Sstevel@tonic-gate */ 16340Sstevel@tonic-gate page_needfree(free_get); 16350Sstevel@tonic-gate cv_signal(&proc_pageout->p_cv); 16360Sstevel@tonic-gate 16370Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 163811066Srafael.vanoni@sun.com (void) cv_reltimedwait(&mhp->mh_cv, &mhp->mh_mutex, 163911066Srafael.vanoni@sun.com DEL_FREE_WAIT_TICKS, TR_CLOCK_TICK); 16400Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 16410Sstevel@tonic-gate page_needfree(-(spgcnt_t)free_get); 16420Sstevel@tonic-gate 16430Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 16440Sstevel@tonic-gate } 16450Sstevel@tonic-gate return (0); 16460Sstevel@tonic-gate } 16470Sstevel@tonic-gate 16480Sstevel@tonic-gate #define DR_AIO_CLEANUP_DELAY 25000 /* 0.025secs, in usec */ 16490Sstevel@tonic-gate #define DR_AIO_CLEANUP_MAXLOOPS_NODELAY 100 16500Sstevel@tonic-gate /* 16510Sstevel@tonic-gate * This function is run as a helper thread for delete_memory_thread. 16520Sstevel@tonic-gate * It is needed in order to force kaio cleanup, so that pages used in kaio 16530Sstevel@tonic-gate * will be unlocked and subsequently relocated by delete_memory_thread. 16540Sstevel@tonic-gate * The address of the delete_memory_threads's mem_handle is passed in to 16550Sstevel@tonic-gate * this thread function, and is used to set the mh_aio_cleanup_done member 16560Sstevel@tonic-gate * prior to calling thread_exit(). 16570Sstevel@tonic-gate */ 16580Sstevel@tonic-gate static void 16590Sstevel@tonic-gate dr_aio_cleanup_thread(caddr_t amhp) 16600Sstevel@tonic-gate { 16610Sstevel@tonic-gate proc_t *procp; 16620Sstevel@tonic-gate int (*aio_cleanup_dr_delete_memory)(proc_t *); 16630Sstevel@tonic-gate int cleaned; 16640Sstevel@tonic-gate int n = 0; 16650Sstevel@tonic-gate struct mem_handle *mhp; 16660Sstevel@tonic-gate volatile uint_t *pcancel; 16670Sstevel@tonic-gate 16680Sstevel@tonic-gate mhp = (struct mem_handle *)amhp; 16690Sstevel@tonic-gate ASSERT(mhp != NULL); 16700Sstevel@tonic-gate pcancel = &mhp->mh_dr_aio_cleanup_cancel; 16710Sstevel@tonic-gate if (modload("sys", "kaio") == -1) { 16720Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 1; 16730Sstevel@tonic-gate cmn_err(CE_WARN, "dr_aio_cleanup_thread: cannot load kaio"); 16740Sstevel@tonic-gate thread_exit(); 16750Sstevel@tonic-gate } 16760Sstevel@tonic-gate aio_cleanup_dr_delete_memory = (int (*)(proc_t *)) 16770Sstevel@tonic-gate modgetsymvalue("aio_cleanup_dr_delete_memory", 0); 16780Sstevel@tonic-gate if (aio_cleanup_dr_delete_memory == NULL) { 16790Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 1; 16800Sstevel@tonic-gate cmn_err(CE_WARN, 16810Sstevel@tonic-gate "aio_cleanup_dr_delete_memory not found in kaio"); 16820Sstevel@tonic-gate thread_exit(); 16830Sstevel@tonic-gate } 16840Sstevel@tonic-gate do { 16850Sstevel@tonic-gate cleaned = 0; 16860Sstevel@tonic-gate mutex_enter(&pidlock); 16870Sstevel@tonic-gate for (procp = practive; (*pcancel == 0) && (procp != NULL); 16880Sstevel@tonic-gate procp = procp->p_next) { 16890Sstevel@tonic-gate mutex_enter(&procp->p_lock); 16900Sstevel@tonic-gate if (procp->p_aio != NULL) { 16910Sstevel@tonic-gate /* cleanup proc's outstanding kaio */ 16920Sstevel@tonic-gate cleaned += 16930Sstevel@tonic-gate (*aio_cleanup_dr_delete_memory)(procp); 16940Sstevel@tonic-gate } 16950Sstevel@tonic-gate mutex_exit(&procp->p_lock); 16960Sstevel@tonic-gate } 16970Sstevel@tonic-gate mutex_exit(&pidlock); 16980Sstevel@tonic-gate if ((*pcancel == 0) && 16990Sstevel@tonic-gate (!cleaned || (++n == DR_AIO_CLEANUP_MAXLOOPS_NODELAY))) { 17000Sstevel@tonic-gate /* delay a bit before retrying all procs again */ 17010Sstevel@tonic-gate delay(drv_usectohz(DR_AIO_CLEANUP_DELAY)); 17020Sstevel@tonic-gate n = 0; 17030Sstevel@tonic-gate } 17040Sstevel@tonic-gate } while (*pcancel == 0); 17050Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 1; 17060Sstevel@tonic-gate thread_exit(); 17070Sstevel@tonic-gate } 17080Sstevel@tonic-gate 17090Sstevel@tonic-gate static void 17100Sstevel@tonic-gate delete_memory_thread(caddr_t amhp) 17110Sstevel@tonic-gate { 17120Sstevel@tonic-gate struct mem_handle *mhp; 17130Sstevel@tonic-gate struct memdelspan *mdsp; 17140Sstevel@tonic-gate callb_cpr_t cprinfo; 17150Sstevel@tonic-gate page_t *pp_targ; 17160Sstevel@tonic-gate spgcnt_t freemem_left; 17170Sstevel@tonic-gate void (*del_complete_funcp)(void *, int error); 17180Sstevel@tonic-gate void *del_complete_arg; 17190Sstevel@tonic-gate int comp_code; 17200Sstevel@tonic-gate int ret; 17210Sstevel@tonic-gate int first_scan; 17220Sstevel@tonic-gate uint_t szc; 17230Sstevel@tonic-gate #ifdef MEM_DEL_STATS 17240Sstevel@tonic-gate uint64_t start_total, ntick_total; 17250Sstevel@tonic-gate uint64_t start_pgrp, ntick_pgrp; 17260Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 17270Sstevel@tonic-gate 17280Sstevel@tonic-gate mhp = (struct mem_handle *)amhp; 17290Sstevel@tonic-gate 17300Sstevel@tonic-gate #ifdef MEM_DEL_STATS 17310Sstevel@tonic-gate start_total = ddi_get_lbolt(); 17320Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 17330Sstevel@tonic-gate 17340Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &mhp->mh_mutex, 17350Sstevel@tonic-gate callb_generic_cpr, "memdel"); 17360Sstevel@tonic-gate 17370Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 17380Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_STARTING); 17390Sstevel@tonic-gate 17400Sstevel@tonic-gate mhp->mh_state = MHND_RUNNING; 17410Sstevel@tonic-gate mhp->mh_thread_id = curthread; 17420Sstevel@tonic-gate 17430Sstevel@tonic-gate mhp->mh_hold_todo = mhp->mh_vm_pages; 17440Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 17450Sstevel@tonic-gate 17460Sstevel@tonic-gate /* Allocate the remap pages now, if necessary. */ 17470Sstevel@tonic-gate memseg_remap_init(); 17480Sstevel@tonic-gate 17490Sstevel@tonic-gate /* 17500Sstevel@tonic-gate * Subtract from availrmem now if possible as availrmem 17510Sstevel@tonic-gate * may not be available by the end of the delete. 17520Sstevel@tonic-gate */ 17530Sstevel@tonic-gate if (!get_availrmem(mhp->mh_vm_pages)) { 17540Sstevel@tonic-gate comp_code = KPHYSM_ENOTVIABLE; 17550Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 17560Sstevel@tonic-gate goto early_exit; 17570Sstevel@tonic-gate } 17580Sstevel@tonic-gate 17590Sstevel@tonic-gate ret = kphysm_setup_pre_del(mhp->mh_vm_pages); 17600Sstevel@tonic-gate 17610Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 17620Sstevel@tonic-gate 17630Sstevel@tonic-gate if (ret != 0) { 17640Sstevel@tonic-gate mhp->mh_cancel = KPHYSM_EREFUSED; 17650Sstevel@tonic-gate goto refused; 17660Sstevel@tonic-gate } 17670Sstevel@tonic-gate 17680Sstevel@tonic-gate transit_list_collect(mhp, 1); 17690Sstevel@tonic-gate 17700Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 17710Sstevel@tonic-gate mdsp = mdsp->mds_next) { 17720Sstevel@tonic-gate ASSERT(mdsp->mds_bitmap == NULL); 17730Sstevel@tonic-gate mdsp->mds_bitmap = kmem_zalloc(MDS_BITMAPBYTES(mdsp), KM_SLEEP); 17740Sstevel@tonic-gate mdsp->mds_bitmap_retired = kmem_zalloc(MDS_BITMAPBYTES(mdsp), 17756242Smb158278 KM_SLEEP); 17760Sstevel@tonic-gate } 17770Sstevel@tonic-gate 17780Sstevel@tonic-gate first_scan = 1; 17790Sstevel@tonic-gate freemem_left = 0; 17800Sstevel@tonic-gate /* 17810Sstevel@tonic-gate * Start dr_aio_cleanup_thread, which periodically iterates 17820Sstevel@tonic-gate * through the process list and invokes aio cleanup. This 17830Sstevel@tonic-gate * is needed in order to avoid a deadly embrace between the 17840Sstevel@tonic-gate * delete_memory_thread (waiting on writer lock for page, with the 17850Sstevel@tonic-gate * exclusive-wanted bit set), kaio read request threads (waiting for a 17860Sstevel@tonic-gate * reader lock on the same page that is wanted by the 17870Sstevel@tonic-gate * delete_memory_thread), and threads waiting for kaio completion 17880Sstevel@tonic-gate * (blocked on spt_amp->lock). 17890Sstevel@tonic-gate */ 17900Sstevel@tonic-gate mhp->mh_dr_aio_cleanup_cancel = 0; 17910Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 0; 17920Sstevel@tonic-gate (void) thread_create(NULL, 0, dr_aio_cleanup_thread, 17930Sstevel@tonic-gate (caddr_t)mhp, 0, &p0, TS_RUN, maxclsyspri - 1); 17940Sstevel@tonic-gate while ((mhp->mh_hold_todo != 0) && (mhp->mh_cancel == 0)) { 17950Sstevel@tonic-gate pgcnt_t collected; 17960Sstevel@tonic-gate 17970Sstevel@tonic-gate MDSTAT_INCR(mhp, nloop); 17980Sstevel@tonic-gate collected = 0; 17990Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; (mdsp != NULL) && 18000Sstevel@tonic-gate (mhp->mh_cancel == 0); mdsp = mdsp->mds_next) { 18010Sstevel@tonic-gate pfn_t pfn, p_end; 18020Sstevel@tonic-gate 18030Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs; 18040Sstevel@tonic-gate for (pfn = mdsp->mds_base; (pfn < p_end) && 18050Sstevel@tonic-gate (mhp->mh_cancel == 0); pfn++) { 18060Sstevel@tonic-gate page_t *pp, *tpp, *tpp_targ; 18070Sstevel@tonic-gate pgcnt_t bit; 18080Sstevel@tonic-gate struct vnode *vp; 18090Sstevel@tonic-gate u_offset_t offset; 18100Sstevel@tonic-gate int mod, result; 18110Sstevel@tonic-gate spgcnt_t pgcnt; 18120Sstevel@tonic-gate 18130Sstevel@tonic-gate bit = pfn - mdsp->mds_base; 18140Sstevel@tonic-gate if ((mdsp->mds_bitmap[bit / NBPBMW] & 18150Sstevel@tonic-gate (1 << (bit % NBPBMW))) != 0) { 18160Sstevel@tonic-gate MDSTAT_INCR(mhp, already_done); 18170Sstevel@tonic-gate continue; 18180Sstevel@tonic-gate } 18190Sstevel@tonic-gate if (freemem_left == 0) { 18200Sstevel@tonic-gate freemem_left += delthr_get_freemem(mhp); 18210Sstevel@tonic-gate if (freemem_left == 0) 18220Sstevel@tonic-gate break; 18230Sstevel@tonic-gate } 18240Sstevel@tonic-gate 18250Sstevel@tonic-gate /* 18260Sstevel@tonic-gate * Release mh_mutex - some of this 18270Sstevel@tonic-gate * stuff takes some time (eg PUTPAGE). 18280Sstevel@tonic-gate */ 18290Sstevel@tonic-gate 18300Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 18310Sstevel@tonic-gate MDSTAT_INCR(mhp, ncheck); 18320Sstevel@tonic-gate 18330Sstevel@tonic-gate pp = page_numtopp_nolock(pfn); 18340Sstevel@tonic-gate if (pp == NULL) { 18350Sstevel@tonic-gate /* 18360Sstevel@tonic-gate * Not covered by a page_t - will 18370Sstevel@tonic-gate * be dealt with elsewhere. 18380Sstevel@tonic-gate */ 18390Sstevel@tonic-gate MDSTAT_INCR(mhp, nopaget); 18400Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 18410Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |= 18420Sstevel@tonic-gate (1 << (bit % NBPBMW)); 18430Sstevel@tonic-gate continue; 18440Sstevel@tonic-gate } 18450Sstevel@tonic-gate 18460Sstevel@tonic-gate if (!page_try_reclaim_lock(pp, SE_EXCL, 1847917Selowe SE_EXCL_WANTED | SE_RETIRED)) { 1848917Selowe /* 1849917Selowe * Page in use elsewhere. Skip it. 1850917Selowe */ 1851917Selowe MDSTAT_INCR(mhp, lockfail); 1852917Selowe mutex_enter(&mhp->mh_mutex); 1853917Selowe continue; 18540Sstevel@tonic-gate } 18550Sstevel@tonic-gate /* 18560Sstevel@tonic-gate * See if the cage expanded into the delete. 18570Sstevel@tonic-gate * This can happen as we have to allow the 18580Sstevel@tonic-gate * cage to expand. 18590Sstevel@tonic-gate */ 18600Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 1861917Selowe page_unlock(pp); 18620Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 18630Sstevel@tonic-gate mhp->mh_cancel = KPHYSM_ENONRELOC; 18640Sstevel@tonic-gate break; 18650Sstevel@tonic-gate } 1866917Selowe if (PP_RETIRED(pp)) { 18670Sstevel@tonic-gate /* 18680Sstevel@tonic-gate * Page has been retired and is 18690Sstevel@tonic-gate * not part of the cage so we 18700Sstevel@tonic-gate * can now do the accounting for 18710Sstevel@tonic-gate * it. 18720Sstevel@tonic-gate */ 18730Sstevel@tonic-gate MDSTAT_INCR(mhp, retired); 18740Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 18750Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] 18760Sstevel@tonic-gate |= (1 << (bit % NBPBMW)); 18770Sstevel@tonic-gate mdsp->mds_bitmap_retired[bit / 18780Sstevel@tonic-gate NBPBMW] |= 18790Sstevel@tonic-gate (1 << (bit % NBPBMW)); 18800Sstevel@tonic-gate mhp->mh_hold_todo--; 18810Sstevel@tonic-gate continue; 18820Sstevel@tonic-gate } 18830Sstevel@tonic-gate ASSERT(freemem_left != 0); 18840Sstevel@tonic-gate if (PP_ISFREE(pp)) { 18850Sstevel@tonic-gate /* 18860Sstevel@tonic-gate * Like page_reclaim() only 'freemem' 18870Sstevel@tonic-gate * processing is already done. 18880Sstevel@tonic-gate */ 18890Sstevel@tonic-gate MDSTAT_INCR(mhp, nfree); 18900Sstevel@tonic-gate free_page_collect: 18910Sstevel@tonic-gate if (PP_ISAGED(pp)) { 18920Sstevel@tonic-gate page_list_sub(pp, 18930Sstevel@tonic-gate PG_FREE_LIST); 18940Sstevel@tonic-gate } else { 18950Sstevel@tonic-gate page_list_sub(pp, 18960Sstevel@tonic-gate PG_CACHE_LIST); 18970Sstevel@tonic-gate } 18980Sstevel@tonic-gate PP_CLRFREE(pp); 18990Sstevel@tonic-gate PP_CLRAGED(pp); 19000Sstevel@tonic-gate collected++; 19010Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 19020Sstevel@tonic-gate page_delete_collect(pp, mhp); 19030Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |= 19040Sstevel@tonic-gate (1 << (bit % NBPBMW)); 19050Sstevel@tonic-gate freemem_left--; 19060Sstevel@tonic-gate continue; 19070Sstevel@tonic-gate } 19080Sstevel@tonic-gate ASSERT(pp->p_vnode != NULL); 19090Sstevel@tonic-gate if (first_scan) { 19100Sstevel@tonic-gate MDSTAT_INCR(mhp, first_notfree); 19110Sstevel@tonic-gate page_unlock(pp); 19120Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 19130Sstevel@tonic-gate continue; 19140Sstevel@tonic-gate } 19150Sstevel@tonic-gate /* 19160Sstevel@tonic-gate * Keep stats on pages encountered that 1917917Selowe * are marked for retirement. 19180Sstevel@tonic-gate */ 1919917Selowe if (PP_TOXIC(pp)) { 19200Sstevel@tonic-gate MDSTAT_INCR(mhp, toxic); 1921917Selowe } else if (PP_PR_REQ(pp)) { 19220Sstevel@tonic-gate MDSTAT_INCR(mhp, failing); 19230Sstevel@tonic-gate } 19240Sstevel@tonic-gate /* 19250Sstevel@tonic-gate * In certain cases below, special exceptions 19260Sstevel@tonic-gate * are made for pages that are toxic. This 19270Sstevel@tonic-gate * is because the current meaning of toxic 19280Sstevel@tonic-gate * is that an uncorrectable error has been 19290Sstevel@tonic-gate * previously associated with the page. 19300Sstevel@tonic-gate */ 19310Sstevel@tonic-gate if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) { 1932917Selowe if (!PP_TOXIC(pp)) { 19330Sstevel@tonic-gate /* 19340Sstevel@tonic-gate * Must relocate locked in 19350Sstevel@tonic-gate * memory pages. 19360Sstevel@tonic-gate */ 19370Sstevel@tonic-gate #ifdef MEM_DEL_STATS 19380Sstevel@tonic-gate start_pgrp = ddi_get_lbolt(); 19390Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 19400Sstevel@tonic-gate /* 19410Sstevel@tonic-gate * Lock all constituent pages 19420Sstevel@tonic-gate * of a large page to ensure 19430Sstevel@tonic-gate * that p_szc won't change. 19440Sstevel@tonic-gate */ 19450Sstevel@tonic-gate if (!group_page_trylock(pp, 19460Sstevel@tonic-gate SE_EXCL)) { 19470Sstevel@tonic-gate MDSTAT_INCR(mhp, 19480Sstevel@tonic-gate gptllckfail); 19490Sstevel@tonic-gate page_unlock(pp); 19500Sstevel@tonic-gate mutex_enter( 19510Sstevel@tonic-gate &mhp->mh_mutex); 19520Sstevel@tonic-gate continue; 19530Sstevel@tonic-gate } 19540Sstevel@tonic-gate MDSTAT_INCR(mhp, npplocked); 19550Sstevel@tonic-gate pp_targ = 19560Sstevel@tonic-gate page_get_replacement_page( 19576242Smb158278 pp, NULL, 0); 19580Sstevel@tonic-gate if (pp_targ != NULL) { 19590Sstevel@tonic-gate #ifdef MEM_DEL_STATS 19600Sstevel@tonic-gate ntick_pgrp = 19610Sstevel@tonic-gate (uint64_t) 19620Sstevel@tonic-gate ddi_get_lbolt() - 19630Sstevel@tonic-gate start_pgrp; 19640Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 19650Sstevel@tonic-gate MDSTAT_PGRP(mhp, 19660Sstevel@tonic-gate ntick_pgrp); 19670Sstevel@tonic-gate MDSTAT_INCR(mhp, 19680Sstevel@tonic-gate nlockreloc); 19690Sstevel@tonic-gate goto reloc; 19700Sstevel@tonic-gate } 19710Sstevel@tonic-gate group_page_unlock(pp); 19720Sstevel@tonic-gate page_unlock(pp); 19730Sstevel@tonic-gate #ifdef MEM_DEL_STATS 19740Sstevel@tonic-gate ntick_pgrp = 19750Sstevel@tonic-gate (uint64_t)ddi_get_lbolt() - 19760Sstevel@tonic-gate start_pgrp; 19770Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 19780Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 19790Sstevel@tonic-gate MDSTAT_INCR(mhp, nnorepl); 19800Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 19810Sstevel@tonic-gate continue; 19820Sstevel@tonic-gate } else { 19830Sstevel@tonic-gate /* 19840Sstevel@tonic-gate * Cannot do anything about 19850Sstevel@tonic-gate * this page because it is 19860Sstevel@tonic-gate * toxic. 19870Sstevel@tonic-gate */ 19880Sstevel@tonic-gate MDSTAT_INCR(mhp, npplkdtoxic); 19890Sstevel@tonic-gate page_unlock(pp); 19900Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 19910Sstevel@tonic-gate continue; 19920Sstevel@tonic-gate } 19930Sstevel@tonic-gate } 19940Sstevel@tonic-gate /* 19950Sstevel@tonic-gate * Unload the mappings and check if mod bit 19960Sstevel@tonic-gate * is set. 19970Sstevel@tonic-gate */ 19983290Sjohansen ASSERT(!PP_ISKAS(pp)); 19990Sstevel@tonic-gate (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD); 20000Sstevel@tonic-gate mod = hat_ismod(pp); 20010Sstevel@tonic-gate 20020Sstevel@tonic-gate #ifdef MEM_DEL_STATS 20030Sstevel@tonic-gate start_pgrp = ddi_get_lbolt(); 20040Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 2005917Selowe if (mod && !PP_TOXIC(pp)) { 20060Sstevel@tonic-gate /* 20070Sstevel@tonic-gate * Lock all constituent pages 20080Sstevel@tonic-gate * of a large page to ensure 20090Sstevel@tonic-gate * that p_szc won't change. 20100Sstevel@tonic-gate */ 20110Sstevel@tonic-gate if (!group_page_trylock(pp, SE_EXCL)) { 20120Sstevel@tonic-gate MDSTAT_INCR(mhp, gptlmodfail); 20130Sstevel@tonic-gate page_unlock(pp); 20140Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 20150Sstevel@tonic-gate continue; 20160Sstevel@tonic-gate } 20170Sstevel@tonic-gate pp_targ = page_get_replacement_page(pp, 20180Sstevel@tonic-gate NULL, 0); 20190Sstevel@tonic-gate if (pp_targ != NULL) { 20200Sstevel@tonic-gate MDSTAT_INCR(mhp, nmodreloc); 20210Sstevel@tonic-gate #ifdef MEM_DEL_STATS 20220Sstevel@tonic-gate ntick_pgrp = 20230Sstevel@tonic-gate (uint64_t)ddi_get_lbolt() - 20246242Smb158278 start_pgrp; 20250Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 20260Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 20270Sstevel@tonic-gate goto reloc; 20280Sstevel@tonic-gate } 20290Sstevel@tonic-gate group_page_unlock(pp); 20300Sstevel@tonic-gate } 20310Sstevel@tonic-gate 20320Sstevel@tonic-gate if (!page_try_demote_pages(pp)) { 20330Sstevel@tonic-gate MDSTAT_INCR(mhp, demotefail); 20340Sstevel@tonic-gate page_unlock(pp); 20350Sstevel@tonic-gate #ifdef MEM_DEL_STATS 20360Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() - 20370Sstevel@tonic-gate start_pgrp; 20380Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 20390Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 20400Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 20410Sstevel@tonic-gate continue; 20420Sstevel@tonic-gate } 20430Sstevel@tonic-gate 20440Sstevel@tonic-gate /* 20450Sstevel@tonic-gate * Regular 'page-out'. 20460Sstevel@tonic-gate */ 20470Sstevel@tonic-gate if (!mod) { 20480Sstevel@tonic-gate MDSTAT_INCR(mhp, ndestroy); 20490Sstevel@tonic-gate page_destroy(pp, 1); 20500Sstevel@tonic-gate /* 20510Sstevel@tonic-gate * page_destroy was called with 20520Sstevel@tonic-gate * dontfree. As long as p_lckcnt 20530Sstevel@tonic-gate * and p_cowcnt are both zero, the 20540Sstevel@tonic-gate * only additional action of 20550Sstevel@tonic-gate * page_destroy with !dontfree is to 20560Sstevel@tonic-gate * call page_free, so we can collect 20570Sstevel@tonic-gate * the page here. 20580Sstevel@tonic-gate */ 20590Sstevel@tonic-gate collected++; 20600Sstevel@tonic-gate #ifdef MEM_DEL_STATS 20610Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() - 20620Sstevel@tonic-gate start_pgrp; 20630Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 20640Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 20650Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 20660Sstevel@tonic-gate page_delete_collect(pp, mhp); 20670Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |= 20680Sstevel@tonic-gate (1 << (bit % NBPBMW)); 20690Sstevel@tonic-gate continue; 20700Sstevel@tonic-gate } 20710Sstevel@tonic-gate /* 20720Sstevel@tonic-gate * The page is toxic and the mod bit is 20730Sstevel@tonic-gate * set, we cannot do anything here to deal 20740Sstevel@tonic-gate * with it. 20750Sstevel@tonic-gate */ 2076917Selowe if (PP_TOXIC(pp)) { 20770Sstevel@tonic-gate page_unlock(pp); 20780Sstevel@tonic-gate #ifdef MEM_DEL_STATS 20790Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() - 20800Sstevel@tonic-gate start_pgrp; 20810Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 20820Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 20830Sstevel@tonic-gate MDSTAT_INCR(mhp, modtoxic); 20840Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 20850Sstevel@tonic-gate continue; 20860Sstevel@tonic-gate } 20870Sstevel@tonic-gate MDSTAT_INCR(mhp, nputpage); 20880Sstevel@tonic-gate vp = pp->p_vnode; 20890Sstevel@tonic-gate offset = pp->p_offset; 20900Sstevel@tonic-gate VN_HOLD(vp); 20910Sstevel@tonic-gate page_unlock(pp); 20920Sstevel@tonic-gate (void) VOP_PUTPAGE(vp, offset, PAGESIZE, 20935331Samw B_INVAL|B_FORCE, kcred, NULL); 20940Sstevel@tonic-gate VN_RELE(vp); 20950Sstevel@tonic-gate #ifdef MEM_DEL_STATS 20960Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() - 20970Sstevel@tonic-gate start_pgrp; 20980Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 20990Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 21000Sstevel@tonic-gate /* 21010Sstevel@tonic-gate * Try to get the page back immediately 21020Sstevel@tonic-gate * so that it can be collected. 21030Sstevel@tonic-gate */ 21040Sstevel@tonic-gate pp = page_numtopp_nolock(pfn); 21050Sstevel@tonic-gate if (pp == NULL) { 21060Sstevel@tonic-gate MDSTAT_INCR(mhp, nnoreclaim); 21070Sstevel@tonic-gate /* 21080Sstevel@tonic-gate * This should not happen as this 21090Sstevel@tonic-gate * thread is deleting the page. 21100Sstevel@tonic-gate * If this code is generalized, this 21110Sstevel@tonic-gate * becomes a reality. 21120Sstevel@tonic-gate */ 21130Sstevel@tonic-gate #ifdef DEBUG 21140Sstevel@tonic-gate cmn_err(CE_WARN, 21150Sstevel@tonic-gate "delete_memory_thread(0x%p) " 21160Sstevel@tonic-gate "pfn 0x%lx has no page_t", 21170Sstevel@tonic-gate (void *)mhp, pfn); 21180Sstevel@tonic-gate #endif /* DEBUG */ 21190Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 21200Sstevel@tonic-gate continue; 21210Sstevel@tonic-gate } 21220Sstevel@tonic-gate if (page_try_reclaim_lock(pp, SE_EXCL, 2123917Selowe SE_EXCL_WANTED | SE_RETIRED)) { 21240Sstevel@tonic-gate if (PP_ISFREE(pp)) { 21250Sstevel@tonic-gate goto free_page_collect; 21260Sstevel@tonic-gate } 21270Sstevel@tonic-gate page_unlock(pp); 21280Sstevel@tonic-gate } 21290Sstevel@tonic-gate MDSTAT_INCR(mhp, nnoreclaim); 21300Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 21310Sstevel@tonic-gate continue; 21320Sstevel@tonic-gate 21330Sstevel@tonic-gate reloc: 21340Sstevel@tonic-gate /* 21350Sstevel@tonic-gate * Got some freemem and a target 21360Sstevel@tonic-gate * page, so move the data to avoid 21370Sstevel@tonic-gate * I/O and lock problems. 21380Sstevel@tonic-gate */ 21390Sstevel@tonic-gate ASSERT(!page_iolock_assert(pp)); 21400Sstevel@tonic-gate MDSTAT_INCR(mhp, nreloc); 21410Sstevel@tonic-gate /* 21420Sstevel@tonic-gate * page_relocate() will return pgcnt: the 21430Sstevel@tonic-gate * number of consecutive pages relocated. 21440Sstevel@tonic-gate * If it is successful, pp will be a 21450Sstevel@tonic-gate * linked list of the page structs that 21460Sstevel@tonic-gate * were relocated. If page_relocate() is 21470Sstevel@tonic-gate * unsuccessful, pp will be unmodified. 21480Sstevel@tonic-gate */ 21490Sstevel@tonic-gate #ifdef MEM_DEL_STATS 21500Sstevel@tonic-gate start_pgrp = ddi_get_lbolt(); 21510Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 21520Sstevel@tonic-gate result = page_relocate(&pp, &pp_targ, 0, 0, 21530Sstevel@tonic-gate &pgcnt, NULL); 21540Sstevel@tonic-gate #ifdef MEM_DEL_STATS 21550Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() - 21560Sstevel@tonic-gate start_pgrp; 21570Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 21580Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp); 21590Sstevel@tonic-gate if (result != 0) { 21600Sstevel@tonic-gate MDSTAT_INCR(mhp, nrelocfail); 21610Sstevel@tonic-gate /* 21620Sstevel@tonic-gate * We did not succeed. We need 21630Sstevel@tonic-gate * to give the pp_targ pages back. 21640Sstevel@tonic-gate * page_free(pp_targ, 1) without 21650Sstevel@tonic-gate * the freemem accounting. 21660Sstevel@tonic-gate */ 21670Sstevel@tonic-gate group_page_unlock(pp); 21680Sstevel@tonic-gate page_free_replacement_page(pp_targ); 21690Sstevel@tonic-gate page_unlock(pp); 21700Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 21710Sstevel@tonic-gate continue; 21720Sstevel@tonic-gate } 21730Sstevel@tonic-gate 21740Sstevel@tonic-gate /* 21750Sstevel@tonic-gate * We will then collect pgcnt pages. 21760Sstevel@tonic-gate */ 21770Sstevel@tonic-gate ASSERT(pgcnt > 0); 21780Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 21790Sstevel@tonic-gate /* 21800Sstevel@tonic-gate * We need to make sure freemem_left is 21810Sstevel@tonic-gate * large enough. 21820Sstevel@tonic-gate */ 21830Sstevel@tonic-gate while ((freemem_left < pgcnt) && 21846242Smb158278 (!mhp->mh_cancel)) { 21850Sstevel@tonic-gate freemem_left += 21866242Smb158278 delthr_get_freemem(mhp); 21870Sstevel@tonic-gate } 21880Sstevel@tonic-gate 21890Sstevel@tonic-gate /* 21900Sstevel@tonic-gate * Do not proceed if mh_cancel is set. 21910Sstevel@tonic-gate */ 21920Sstevel@tonic-gate if (mhp->mh_cancel) { 21930Sstevel@tonic-gate while (pp_targ != NULL) { 21940Sstevel@tonic-gate /* 21950Sstevel@tonic-gate * Unlink and unlock each page. 21960Sstevel@tonic-gate */ 21970Sstevel@tonic-gate tpp_targ = pp_targ; 21980Sstevel@tonic-gate page_sub(&pp_targ, tpp_targ); 21990Sstevel@tonic-gate page_unlock(tpp_targ); 22000Sstevel@tonic-gate } 22010Sstevel@tonic-gate /* 22020Sstevel@tonic-gate * We need to give the pp pages back. 22030Sstevel@tonic-gate * page_free(pp, 1) without the 22040Sstevel@tonic-gate * freemem accounting. 22050Sstevel@tonic-gate */ 22060Sstevel@tonic-gate page_free_replacement_page(pp); 22070Sstevel@tonic-gate break; 22080Sstevel@tonic-gate } 22090Sstevel@tonic-gate 22100Sstevel@tonic-gate /* Now remove pgcnt from freemem_left */ 22110Sstevel@tonic-gate freemem_left -= pgcnt; 22120Sstevel@tonic-gate ASSERT(freemem_left >= 0); 22130Sstevel@tonic-gate szc = pp->p_szc; 22140Sstevel@tonic-gate while (pp != NULL) { 22150Sstevel@tonic-gate /* 22160Sstevel@tonic-gate * pp and pp_targ were passed back as 22170Sstevel@tonic-gate * a linked list of pages. 22180Sstevel@tonic-gate * Unlink and unlock each page. 22190Sstevel@tonic-gate */ 22200Sstevel@tonic-gate tpp_targ = pp_targ; 22210Sstevel@tonic-gate page_sub(&pp_targ, tpp_targ); 22220Sstevel@tonic-gate page_unlock(tpp_targ); 22230Sstevel@tonic-gate /* 22240Sstevel@tonic-gate * The original page is now free 22250Sstevel@tonic-gate * so remove it from the linked 22260Sstevel@tonic-gate * list and collect it. 22270Sstevel@tonic-gate */ 22280Sstevel@tonic-gate tpp = pp; 22290Sstevel@tonic-gate page_sub(&pp, tpp); 22300Sstevel@tonic-gate pfn = page_pptonum(tpp); 22310Sstevel@tonic-gate collected++; 22320Sstevel@tonic-gate ASSERT(PAGE_EXCL(tpp)); 22330Sstevel@tonic-gate ASSERT(tpp->p_vnode == NULL); 22340Sstevel@tonic-gate ASSERT(!hat_page_is_mapped(tpp)); 22350Sstevel@tonic-gate ASSERT(tpp->p_szc == szc); 22360Sstevel@tonic-gate tpp->p_szc = 0; 22370Sstevel@tonic-gate page_delete_collect(tpp, mhp); 22380Sstevel@tonic-gate bit = pfn - mdsp->mds_base; 22390Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |= 22406242Smb158278 (1 << (bit % NBPBMW)); 22410Sstevel@tonic-gate } 22420Sstevel@tonic-gate ASSERT(pp_targ == NULL); 22430Sstevel@tonic-gate } 22440Sstevel@tonic-gate } 22450Sstevel@tonic-gate first_scan = 0; 22460Sstevel@tonic-gate if ((mhp->mh_cancel == 0) && (mhp->mh_hold_todo != 0) && 22476242Smb158278 (collected == 0)) { 22480Sstevel@tonic-gate /* 22490Sstevel@tonic-gate * This code is needed as we cannot wait 22500Sstevel@tonic-gate * for a page to be locked OR the delete to 22510Sstevel@tonic-gate * be cancelled. Also, we must delay so 22520Sstevel@tonic-gate * that other threads get a chance to run 22530Sstevel@tonic-gate * on our cpu, otherwise page locks may be 22540Sstevel@tonic-gate * held indefinitely by those threads. 22550Sstevel@tonic-gate */ 22560Sstevel@tonic-gate MDSTAT_INCR(mhp, ndelay); 22570Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 225811066Srafael.vanoni@sun.com (void) cv_reltimedwait(&mhp->mh_cv, &mhp->mh_mutex, 225911066Srafael.vanoni@sun.com DEL_BUSY_WAIT_TICKS, TR_CLOCK_TICK); 22600Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &mhp->mh_mutex); 22610Sstevel@tonic-gate } 22620Sstevel@tonic-gate } 22630Sstevel@tonic-gate /* stop the dr aio cleanup thread */ 22640Sstevel@tonic-gate mhp->mh_dr_aio_cleanup_cancel = 1; 22650Sstevel@tonic-gate transit_list_collect(mhp, 0); 22660Sstevel@tonic-gate if (freemem_left != 0) { 22670Sstevel@tonic-gate /* Return any surplus. */ 22680Sstevel@tonic-gate page_create_putback(freemem_left); 22690Sstevel@tonic-gate freemem_left = 0; 22700Sstevel@tonic-gate } 22710Sstevel@tonic-gate #ifdef MEM_DEL_STATS 22720Sstevel@tonic-gate ntick_total = (uint64_t)ddi_get_lbolt() - start_total; 22730Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 22740Sstevel@tonic-gate MDSTAT_TOTAL(mhp, ntick_total); 22750Sstevel@tonic-gate MDSTAT_PRINT(mhp); 22760Sstevel@tonic-gate 22770Sstevel@tonic-gate /* 22780Sstevel@tonic-gate * If the memory delete was cancelled, exclusive-wanted bits must 2279917Selowe * be cleared. If there are retired pages being deleted, they need 2280917Selowe * to be unretired. 22810Sstevel@tonic-gate */ 22820Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 22830Sstevel@tonic-gate mdsp = mdsp->mds_next) { 22840Sstevel@tonic-gate pfn_t pfn, p_end; 22850Sstevel@tonic-gate 22860Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs; 22870Sstevel@tonic-gate for (pfn = mdsp->mds_base; pfn < p_end; pfn++) { 22880Sstevel@tonic-gate page_t *pp; 22890Sstevel@tonic-gate pgcnt_t bit; 22900Sstevel@tonic-gate 22910Sstevel@tonic-gate bit = pfn - mdsp->mds_base; 22920Sstevel@tonic-gate if (mhp->mh_cancel) { 22930Sstevel@tonic-gate pp = page_numtopp_nolock(pfn); 22940Sstevel@tonic-gate if (pp != NULL) { 22950Sstevel@tonic-gate if ((mdsp->mds_bitmap[bit / NBPBMW] & 22960Sstevel@tonic-gate (1 << (bit % NBPBMW))) == 0) { 22970Sstevel@tonic-gate page_lock_clr_exclwanted(pp); 22980Sstevel@tonic-gate } 22990Sstevel@tonic-gate } 23000Sstevel@tonic-gate } else { 23010Sstevel@tonic-gate pp = NULL; 23020Sstevel@tonic-gate } 23030Sstevel@tonic-gate if ((mdsp->mds_bitmap_retired[bit / NBPBMW] & 23040Sstevel@tonic-gate (1 << (bit % NBPBMW))) != 0) { 23050Sstevel@tonic-gate /* do we already have pp? */ 23060Sstevel@tonic-gate if (pp == NULL) { 23070Sstevel@tonic-gate pp = page_numtopp_nolock(pfn); 23080Sstevel@tonic-gate } 23090Sstevel@tonic-gate ASSERT(pp != NULL); 2310917Selowe ASSERT(PP_RETIRED(pp)); 23110Sstevel@tonic-gate if (mhp->mh_cancel != 0) { 2312917Selowe page_unlock(pp); 23130Sstevel@tonic-gate /* 23140Sstevel@tonic-gate * To satisfy ASSERT below in 23150Sstevel@tonic-gate * cancel code. 23160Sstevel@tonic-gate */ 23170Sstevel@tonic-gate mhp->mh_hold_todo++; 23180Sstevel@tonic-gate } else { 23193253Smec (void) page_unretire_pp(pp, 23203253Smec PR_UNR_CLEAN); 23210Sstevel@tonic-gate } 23220Sstevel@tonic-gate } 23230Sstevel@tonic-gate } 23240Sstevel@tonic-gate } 23250Sstevel@tonic-gate /* 23260Sstevel@tonic-gate * Free retired page bitmap and collected page bitmap 23270Sstevel@tonic-gate */ 23280Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 23290Sstevel@tonic-gate mdsp = mdsp->mds_next) { 23300Sstevel@tonic-gate ASSERT(mdsp->mds_bitmap_retired != NULL); 23310Sstevel@tonic-gate kmem_free(mdsp->mds_bitmap_retired, MDS_BITMAPBYTES(mdsp)); 23320Sstevel@tonic-gate mdsp->mds_bitmap_retired = NULL; /* Paranoia. */ 23330Sstevel@tonic-gate ASSERT(mdsp->mds_bitmap != NULL); 23340Sstevel@tonic-gate kmem_free(mdsp->mds_bitmap, MDS_BITMAPBYTES(mdsp)); 23350Sstevel@tonic-gate mdsp->mds_bitmap = NULL; /* Paranoia. */ 23360Sstevel@tonic-gate } 23370Sstevel@tonic-gate 23380Sstevel@tonic-gate /* wait for our dr aio cancel thread to exit */ 23390Sstevel@tonic-gate while (!(mhp->mh_aio_cleanup_done)) { 23400Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 23410Sstevel@tonic-gate delay(drv_usectohz(DR_AIO_CLEANUP_DELAY)); 23420Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &mhp->mh_mutex); 23430Sstevel@tonic-gate } 23440Sstevel@tonic-gate refused: 23450Sstevel@tonic-gate if (mhp->mh_cancel != 0) { 23460Sstevel@tonic-gate page_t *pp; 23470Sstevel@tonic-gate 23480Sstevel@tonic-gate comp_code = mhp->mh_cancel; 23490Sstevel@tonic-gate /* 23500Sstevel@tonic-gate * Go through list of deleted pages (mh_deleted) freeing 23510Sstevel@tonic-gate * them. 23520Sstevel@tonic-gate */ 23530Sstevel@tonic-gate while ((pp = mhp->mh_deleted) != NULL) { 23540Sstevel@tonic-gate mhp->mh_deleted = pp->p_next; 23550Sstevel@tonic-gate mhp->mh_hold_todo++; 23560Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 23570Sstevel@tonic-gate /* Restore p_next. */ 23580Sstevel@tonic-gate pp->p_next = pp->p_prev; 23590Sstevel@tonic-gate if (PP_ISFREE(pp)) { 23600Sstevel@tonic-gate cmn_err(CE_PANIC, 23610Sstevel@tonic-gate "page %p is free", 23620Sstevel@tonic-gate (void *)pp); 23630Sstevel@tonic-gate } 23640Sstevel@tonic-gate page_free(pp, 1); 23650Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 23660Sstevel@tonic-gate } 23670Sstevel@tonic-gate ASSERT(mhp->mh_hold_todo == mhp->mh_vm_pages); 23680Sstevel@tonic-gate 23690Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 23700Sstevel@tonic-gate put_availrmem(mhp->mh_vm_pages); 23710Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 23720Sstevel@tonic-gate 23730Sstevel@tonic-gate goto t_exit; 23740Sstevel@tonic-gate } 23750Sstevel@tonic-gate 23760Sstevel@tonic-gate /* 23770Sstevel@tonic-gate * All the pages are no longer in use and are exclusively locked. 23780Sstevel@tonic-gate */ 23790Sstevel@tonic-gate 23800Sstevel@tonic-gate mhp->mh_deleted = NULL; 23810Sstevel@tonic-gate 23820Sstevel@tonic-gate kphysm_del_cleanup(mhp); 23830Sstevel@tonic-gate 23846242Smb158278 /* 238510106SJason.Beloro@Sun.COM * mem_node_del_range needs to be after kphysm_del_cleanup so 23866242Smb158278 * that the mem_node_config[] will remain intact for the cleanup. 23876242Smb158278 */ 23886242Smb158278 for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 23896242Smb158278 mdsp = mdsp->mds_next) { 239010106SJason.Beloro@Sun.COM mem_node_del_range(mdsp->mds_base, 239110106SJason.Beloro@Sun.COM mdsp->mds_base + mdsp->mds_npgs - 1); 23926242Smb158278 } 239311185SSean.McEnroe@Sun.COM /* cleanup the page counters */ 239411185SSean.McEnroe@Sun.COM page_ctrs_cleanup(); 23956242Smb158278 23960Sstevel@tonic-gate comp_code = KPHYSM_OK; 23970Sstevel@tonic-gate 23980Sstevel@tonic-gate t_exit: 23990Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 24000Sstevel@tonic-gate kphysm_setup_post_del(mhp->mh_vm_pages, 24010Sstevel@tonic-gate (comp_code == KPHYSM_OK) ? 0 : 1); 24020Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 24030Sstevel@tonic-gate 24040Sstevel@tonic-gate early_exit: 24050Sstevel@tonic-gate /* mhp->mh_mutex exited by CALLB_CPR_EXIT() */ 24060Sstevel@tonic-gate mhp->mh_state = MHND_DONE; 24070Sstevel@tonic-gate del_complete_funcp = mhp->mh_delete_complete; 24080Sstevel@tonic-gate del_complete_arg = mhp->mh_delete_complete_arg; 24090Sstevel@tonic-gate CALLB_CPR_EXIT(&cprinfo); 24100Sstevel@tonic-gate (*del_complete_funcp)(del_complete_arg, comp_code); 24110Sstevel@tonic-gate thread_exit(); 24120Sstevel@tonic-gate /*NOTREACHED*/ 24130Sstevel@tonic-gate } 24140Sstevel@tonic-gate 24150Sstevel@tonic-gate /* 24160Sstevel@tonic-gate * Start the delete of the memory from the system. 24170Sstevel@tonic-gate */ 24180Sstevel@tonic-gate int 24190Sstevel@tonic-gate kphysm_del_start( 24200Sstevel@tonic-gate memhandle_t handle, 24210Sstevel@tonic-gate void (*complete)(void *, int), 24220Sstevel@tonic-gate void *complete_arg) 24230Sstevel@tonic-gate { 24240Sstevel@tonic-gate struct mem_handle *mhp; 24250Sstevel@tonic-gate 24260Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle); 24270Sstevel@tonic-gate if (mhp == NULL) { 24280Sstevel@tonic-gate return (KPHYSM_EHANDLE); 24290Sstevel@tonic-gate } 24300Sstevel@tonic-gate switch (mhp->mh_state) { 24310Sstevel@tonic-gate case MHND_FREE: 24320Sstevel@tonic-gate ASSERT(mhp->mh_state != MHND_FREE); 24330Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 24340Sstevel@tonic-gate return (KPHYSM_EHANDLE); 24350Sstevel@tonic-gate case MHND_INIT: 24360Sstevel@tonic-gate break; 24370Sstevel@tonic-gate case MHND_STARTING: 24380Sstevel@tonic-gate case MHND_RUNNING: 24390Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 24400Sstevel@tonic-gate return (KPHYSM_ESEQUENCE); 24410Sstevel@tonic-gate case MHND_DONE: 24420Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 24430Sstevel@tonic-gate return (KPHYSM_ESEQUENCE); 24440Sstevel@tonic-gate case MHND_RELEASE: 24450Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 24460Sstevel@tonic-gate return (KPHYSM_ESEQUENCE); 24470Sstevel@tonic-gate default: 24480Sstevel@tonic-gate #ifdef DEBUG 24490Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_del_start(0x%p) state corrupt %d", 24500Sstevel@tonic-gate (void *)mhp, mhp->mh_state); 24510Sstevel@tonic-gate #endif /* DEBUG */ 24520Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 24530Sstevel@tonic-gate return (KPHYSM_EHANDLE); 24540Sstevel@tonic-gate } 24550Sstevel@tonic-gate 24560Sstevel@tonic-gate if (mhp->mh_transit.trl_spans == NULL) { 24570Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 24580Sstevel@tonic-gate return (KPHYSM_ENOWORK); 24590Sstevel@tonic-gate } 24600Sstevel@tonic-gate 24610Sstevel@tonic-gate ASSERT(complete != NULL); 24620Sstevel@tonic-gate mhp->mh_delete_complete = complete; 24630Sstevel@tonic-gate mhp->mh_delete_complete_arg = complete_arg; 24640Sstevel@tonic-gate mhp->mh_state = MHND_STARTING; 24650Sstevel@tonic-gate /* 24660Sstevel@tonic-gate * Release the mutex in case thread_create sleeps. 24670Sstevel@tonic-gate */ 24680Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 24690Sstevel@tonic-gate 24700Sstevel@tonic-gate /* 24710Sstevel@tonic-gate * The "obvious" process for this thread is pageout (proc_pageout) 24720Sstevel@tonic-gate * but this gives the thread too much power over freemem 24730Sstevel@tonic-gate * which results in freemem starvation. 24740Sstevel@tonic-gate */ 24750Sstevel@tonic-gate (void) thread_create(NULL, 0, delete_memory_thread, mhp, 0, &p0, 24760Sstevel@tonic-gate TS_RUN, maxclsyspri - 1); 24770Sstevel@tonic-gate 24780Sstevel@tonic-gate return (KPHYSM_OK); 24790Sstevel@tonic-gate } 24800Sstevel@tonic-gate 24810Sstevel@tonic-gate static kmutex_t pp_dummy_lock; /* Protects init. of pp_dummy. */ 24820Sstevel@tonic-gate static caddr_t pp_dummy; 24830Sstevel@tonic-gate static pgcnt_t pp_dummy_npages; 24840Sstevel@tonic-gate static pfn_t *pp_dummy_pfn; /* Array of dummy pfns. */ 24850Sstevel@tonic-gate 24860Sstevel@tonic-gate static void 24870Sstevel@tonic-gate memseg_remap_init_pages(page_t *pages, page_t *epages) 24880Sstevel@tonic-gate { 24890Sstevel@tonic-gate page_t *pp; 24900Sstevel@tonic-gate 24910Sstevel@tonic-gate for (pp = pages; pp < epages; pp++) { 24920Sstevel@tonic-gate pp->p_pagenum = PFN_INVALID; /* XXXX */ 24930Sstevel@tonic-gate pp->p_offset = (u_offset_t)-1; 24940Sstevel@tonic-gate page_iolock_init(pp); 24950Sstevel@tonic-gate while (!page_lock(pp, SE_EXCL, (kmutex_t *)NULL, P_RECLAIM)) 24960Sstevel@tonic-gate continue; 24970Sstevel@tonic-gate page_lock_delete(pp); 24980Sstevel@tonic-gate } 24990Sstevel@tonic-gate } 25000Sstevel@tonic-gate 25010Sstevel@tonic-gate void 25020Sstevel@tonic-gate memseg_remap_init() 25030Sstevel@tonic-gate { 25040Sstevel@tonic-gate mutex_enter(&pp_dummy_lock); 25050Sstevel@tonic-gate if (pp_dummy == NULL) { 25060Sstevel@tonic-gate uint_t dpages; 25070Sstevel@tonic-gate int i; 25080Sstevel@tonic-gate 25090Sstevel@tonic-gate /* 25100Sstevel@tonic-gate * dpages starts off as the size of the structure and 25110Sstevel@tonic-gate * ends up as the minimum number of pages that will 25120Sstevel@tonic-gate * hold a whole number of page_t structures. 25130Sstevel@tonic-gate */ 25140Sstevel@tonic-gate dpages = sizeof (page_t); 25150Sstevel@tonic-gate ASSERT(dpages != 0); 25160Sstevel@tonic-gate ASSERT(dpages <= MMU_PAGESIZE); 25170Sstevel@tonic-gate 25180Sstevel@tonic-gate while ((dpages & 1) == 0) 25190Sstevel@tonic-gate dpages >>= 1; 25200Sstevel@tonic-gate 25210Sstevel@tonic-gate pp_dummy_npages = dpages; 25220Sstevel@tonic-gate /* 25230Sstevel@tonic-gate * Allocate pp_dummy pages directly from static_arena, 25240Sstevel@tonic-gate * since these are whole page allocations and are 25250Sstevel@tonic-gate * referenced by physical address. This also has the 25260Sstevel@tonic-gate * nice fringe benefit of hiding the memory from 25270Sstevel@tonic-gate * ::findleaks since it doesn't deal well with allocated 25280Sstevel@tonic-gate * kernel heap memory that doesn't have any mappings. 25290Sstevel@tonic-gate */ 25300Sstevel@tonic-gate pp_dummy = vmem_xalloc(static_arena, ptob(pp_dummy_npages), 25310Sstevel@tonic-gate PAGESIZE, 0, 0, NULL, NULL, VM_SLEEP); 25320Sstevel@tonic-gate bzero(pp_dummy, ptob(pp_dummy_npages)); 25330Sstevel@tonic-gate ASSERT(((uintptr_t)pp_dummy & MMU_PAGEOFFSET) == 0); 25340Sstevel@tonic-gate pp_dummy_pfn = kmem_alloc(sizeof (*pp_dummy_pfn) * 25350Sstevel@tonic-gate pp_dummy_npages, KM_SLEEP); 25360Sstevel@tonic-gate for (i = 0; i < pp_dummy_npages; i++) { 25370Sstevel@tonic-gate pp_dummy_pfn[i] = hat_getpfnum(kas.a_hat, 25380Sstevel@tonic-gate &pp_dummy[MMU_PAGESIZE * i]); 25390Sstevel@tonic-gate ASSERT(pp_dummy_pfn[i] != PFN_INVALID); 25400Sstevel@tonic-gate } 25410Sstevel@tonic-gate /* 25420Sstevel@tonic-gate * Initialize the page_t's to a known 'deleted' state 25430Sstevel@tonic-gate * that matches the state of deleted pages. 25440Sstevel@tonic-gate */ 25450Sstevel@tonic-gate memseg_remap_init_pages((page_t *)pp_dummy, 25466242Smb158278 (page_t *)(pp_dummy + ptob(pp_dummy_npages))); 25470Sstevel@tonic-gate /* Remove kmem mappings for the pages for safety. */ 25480Sstevel@tonic-gate hat_unload(kas.a_hat, pp_dummy, ptob(pp_dummy_npages), 25490Sstevel@tonic-gate HAT_UNLOAD_UNLOCK); 25500Sstevel@tonic-gate /* Leave pp_dummy pointer set as flag that init is done. */ 25510Sstevel@tonic-gate } 25520Sstevel@tonic-gate mutex_exit(&pp_dummy_lock); 25530Sstevel@tonic-gate } 25540Sstevel@tonic-gate 255510106SJason.Beloro@Sun.COM /* 255610106SJason.Beloro@Sun.COM * Remap a page-aglined range of page_t's to dummy pages. 255710106SJason.Beloro@Sun.COM */ 255810106SJason.Beloro@Sun.COM void 255910106SJason.Beloro@Sun.COM remap_to_dummy(caddr_t va, pgcnt_t metapgs) 25600Sstevel@tonic-gate { 256110106SJason.Beloro@Sun.COM int phase; 256210106SJason.Beloro@Sun.COM 256310106SJason.Beloro@Sun.COM ASSERT(IS_P2ALIGNED((uint64_t)va, PAGESIZE)); 256410106SJason.Beloro@Sun.COM 256510106SJason.Beloro@Sun.COM /* 256610106SJason.Beloro@Sun.COM * We may start remapping at a non-zero page offset 256710106SJason.Beloro@Sun.COM * within the dummy pages since the low/high ends 256810106SJason.Beloro@Sun.COM * of the outgoing pp's could be shared by other 256910106SJason.Beloro@Sun.COM * memsegs (see memseg_remap_meta). 257010106SJason.Beloro@Sun.COM */ 257110106SJason.Beloro@Sun.COM phase = btop((uint64_t)va) % pp_dummy_npages; 257210106SJason.Beloro@Sun.COM ASSERT(PAGESIZE % sizeof (page_t) || phase == 0); 25730Sstevel@tonic-gate 25740Sstevel@tonic-gate while (metapgs != 0) { 25750Sstevel@tonic-gate pgcnt_t n; 257610106SJason.Beloro@Sun.COM int i, j; 25770Sstevel@tonic-gate 25780Sstevel@tonic-gate n = pp_dummy_npages; 25790Sstevel@tonic-gate if (n > metapgs) 25800Sstevel@tonic-gate n = metapgs; 25810Sstevel@tonic-gate for (i = 0; i < n; i++) { 258210106SJason.Beloro@Sun.COM j = (i + phase) % pp_dummy_npages; 258310106SJason.Beloro@Sun.COM hat_devload(kas.a_hat, va, ptob(1), pp_dummy_pfn[j], 25840Sstevel@tonic-gate PROT_READ, 25850Sstevel@tonic-gate HAT_LOAD | HAT_LOAD_NOCONSIST | 25860Sstevel@tonic-gate HAT_LOAD_REMAP); 258710106SJason.Beloro@Sun.COM va += ptob(1); 25880Sstevel@tonic-gate } 25890Sstevel@tonic-gate metapgs -= n; 25900Sstevel@tonic-gate } 25910Sstevel@tonic-gate } 25920Sstevel@tonic-gate 259310106SJason.Beloro@Sun.COM static void 259410106SJason.Beloro@Sun.COM memseg_remap_to_dummy(struct memseg *seg) 259510106SJason.Beloro@Sun.COM { 259610106SJason.Beloro@Sun.COM caddr_t pp; 259710106SJason.Beloro@Sun.COM pgcnt_t metapgs; 259810106SJason.Beloro@Sun.COM 259910106SJason.Beloro@Sun.COM ASSERT(memseg_is_dynamic(seg)); 260010106SJason.Beloro@Sun.COM ASSERT(pp_dummy != NULL); 260110106SJason.Beloro@Sun.COM 260210106SJason.Beloro@Sun.COM 260310106SJason.Beloro@Sun.COM if (!memseg_includes_meta(seg)) { 260410106SJason.Beloro@Sun.COM memseg_remap_meta(seg); 260510106SJason.Beloro@Sun.COM return; 260610106SJason.Beloro@Sun.COM } 260710106SJason.Beloro@Sun.COM 260810106SJason.Beloro@Sun.COM pp = (caddr_t)seg->pages; 260910106SJason.Beloro@Sun.COM metapgs = seg->pages_base - memseg_get_start(seg); 261010106SJason.Beloro@Sun.COM ASSERT(metapgs != 0); 261110106SJason.Beloro@Sun.COM 261210106SJason.Beloro@Sun.COM seg->pages_end = seg->pages_base; 261310106SJason.Beloro@Sun.COM 261410106SJason.Beloro@Sun.COM remap_to_dummy(pp, metapgs); 261510106SJason.Beloro@Sun.COM } 261610106SJason.Beloro@Sun.COM 26170Sstevel@tonic-gate /* 26180Sstevel@tonic-gate * Transition all the deleted pages to the deleted state so that 26190Sstevel@tonic-gate * page_lock will not wait. The page_lock_delete call will 26200Sstevel@tonic-gate * also wake up any waiters. 26210Sstevel@tonic-gate */ 26220Sstevel@tonic-gate static void 26230Sstevel@tonic-gate memseg_lock_delete_all(struct memseg *seg) 26240Sstevel@tonic-gate { 26250Sstevel@tonic-gate page_t *pp; 26260Sstevel@tonic-gate 26270Sstevel@tonic-gate for (pp = seg->pages; pp < seg->epages; pp++) { 26280Sstevel@tonic-gate pp->p_pagenum = PFN_INVALID; /* XXXX */ 26290Sstevel@tonic-gate page_lock_delete(pp); 26300Sstevel@tonic-gate } 26310Sstevel@tonic-gate } 26320Sstevel@tonic-gate 26330Sstevel@tonic-gate static void 26340Sstevel@tonic-gate kphysm_del_cleanup(struct mem_handle *mhp) 26350Sstevel@tonic-gate { 26360Sstevel@tonic-gate struct memdelspan *mdsp; 26370Sstevel@tonic-gate struct memseg *seg; 26380Sstevel@tonic-gate struct memseg **segpp; 26390Sstevel@tonic-gate struct memseg *seglist; 26400Sstevel@tonic-gate pfn_t p_end; 26410Sstevel@tonic-gate uint64_t avmem; 26420Sstevel@tonic-gate pgcnt_t avpgs; 26430Sstevel@tonic-gate pgcnt_t npgs; 26440Sstevel@tonic-gate 26450Sstevel@tonic-gate avpgs = mhp->mh_vm_pages; 26460Sstevel@tonic-gate 26470Sstevel@tonic-gate memsegs_lock(1); 26480Sstevel@tonic-gate 26490Sstevel@tonic-gate /* 26500Sstevel@tonic-gate * remove from main segment list. 26510Sstevel@tonic-gate */ 26520Sstevel@tonic-gate npgs = 0; 26530Sstevel@tonic-gate seglist = NULL; 26540Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL; 26550Sstevel@tonic-gate mdsp = mdsp->mds_next) { 26560Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs; 26570Sstevel@tonic-gate for (segpp = &memsegs; (seg = *segpp) != NULL; ) { 26580Sstevel@tonic-gate if (seg->pages_base >= p_end || 26590Sstevel@tonic-gate seg->pages_end <= mdsp->mds_base) { 26600Sstevel@tonic-gate /* Span and memseg don't overlap. */ 26610Sstevel@tonic-gate segpp = &((*segpp)->next); 26620Sstevel@tonic-gate continue; 26630Sstevel@tonic-gate } 26640Sstevel@tonic-gate ASSERT(seg->pages_base >= mdsp->mds_base); 26650Sstevel@tonic-gate ASSERT(seg->pages_end <= p_end); 26660Sstevel@tonic-gate 26671373Skchow PLCNT_MODIFY_MAX(seg->pages_base, 26681373Skchow seg->pages_base - seg->pages_end); 26691373Skchow 26700Sstevel@tonic-gate /* Hide the memseg from future scans. */ 26710Sstevel@tonic-gate hat_kpm_delmem_mseg_update(seg, segpp); 26720Sstevel@tonic-gate *segpp = seg->next; 26730Sstevel@tonic-gate membar_producer(); /* TODO: Needed? */ 26740Sstevel@tonic-gate npgs += MSEG_NPAGES(seg); 26750Sstevel@tonic-gate 26760Sstevel@tonic-gate /* 26770Sstevel@tonic-gate * Leave the deleted segment's next pointer intact 26780Sstevel@tonic-gate * in case a memsegs scanning loop is walking this 26790Sstevel@tonic-gate * segment concurrently. 26800Sstevel@tonic-gate */ 26810Sstevel@tonic-gate seg->lnext = seglist; 26820Sstevel@tonic-gate seglist = seg; 26830Sstevel@tonic-gate } 26840Sstevel@tonic-gate } 26850Sstevel@tonic-gate 26860Sstevel@tonic-gate build_pfn_hash(); 26870Sstevel@tonic-gate 26880Sstevel@tonic-gate ASSERT(npgs < total_pages); 26890Sstevel@tonic-gate total_pages -= npgs; 26900Sstevel@tonic-gate 26910Sstevel@tonic-gate /* 26920Sstevel@tonic-gate * Recalculate the paging parameters now total_pages has changed. 26930Sstevel@tonic-gate * This will also cause the clock hands to be reset before next use. 26940Sstevel@tonic-gate */ 26950Sstevel@tonic-gate setupclock(1); 26960Sstevel@tonic-gate 26970Sstevel@tonic-gate memsegs_unlock(1); 26980Sstevel@tonic-gate 26990Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex); 27000Sstevel@tonic-gate 27010Sstevel@tonic-gate while ((seg = seglist) != NULL) { 27020Sstevel@tonic-gate pfn_t mseg_start; 27030Sstevel@tonic-gate pfn_t mseg_base, mseg_end; 27040Sstevel@tonic-gate pgcnt_t mseg_npgs; 27050Sstevel@tonic-gate int mlret; 27060Sstevel@tonic-gate 27070Sstevel@tonic-gate seglist = seg->lnext; 27080Sstevel@tonic-gate 27090Sstevel@tonic-gate /* 27100Sstevel@tonic-gate * Put the page_t's into the deleted state to stop 27110Sstevel@tonic-gate * cv_wait()s on the pages. When we remap, the dummy 27120Sstevel@tonic-gate * page_t's will be in the same state. 27130Sstevel@tonic-gate */ 27140Sstevel@tonic-gate memseg_lock_delete_all(seg); 27150Sstevel@tonic-gate /* 27160Sstevel@tonic-gate * Collect up information based on pages_base and pages_end 27170Sstevel@tonic-gate * early so that we can flag early that the memseg has been 27180Sstevel@tonic-gate * deleted by setting pages_end == pages_base. 27190Sstevel@tonic-gate */ 27200Sstevel@tonic-gate mseg_base = seg->pages_base; 27210Sstevel@tonic-gate mseg_end = seg->pages_end; 27220Sstevel@tonic-gate mseg_npgs = MSEG_NPAGES(seg); 272310106SJason.Beloro@Sun.COM mseg_start = memseg_get_start(seg); 272410106SJason.Beloro@Sun.COM 272510106SJason.Beloro@Sun.COM if (memseg_is_dynamic(seg)) { 27260Sstevel@tonic-gate /* Remap the meta data to our special dummy area. */ 272710106SJason.Beloro@Sun.COM memseg_remap_to_dummy(seg); 27280Sstevel@tonic-gate 27290Sstevel@tonic-gate mutex_enter(&memseg_lists_lock); 27300Sstevel@tonic-gate seg->lnext = memseg_va_avail; 27310Sstevel@tonic-gate memseg_va_avail = seg; 27320Sstevel@tonic-gate mutex_exit(&memseg_lists_lock); 27330Sstevel@tonic-gate } else { 27340Sstevel@tonic-gate /* 27350Sstevel@tonic-gate * For memory whose page_ts were allocated 27360Sstevel@tonic-gate * at boot, we need to find a new use for 27370Sstevel@tonic-gate * the page_t memory. 27380Sstevel@tonic-gate * For the moment, just leak it. 27390Sstevel@tonic-gate * (It is held in the memseg_delete_junk list.) 27400Sstevel@tonic-gate */ 274110106SJason.Beloro@Sun.COM seg->pages_end = seg->pages_base; 27420Sstevel@tonic-gate 27430Sstevel@tonic-gate mutex_enter(&memseg_lists_lock); 27440Sstevel@tonic-gate seg->lnext = memseg_delete_junk; 27450Sstevel@tonic-gate memseg_delete_junk = seg; 27460Sstevel@tonic-gate mutex_exit(&memseg_lists_lock); 27470Sstevel@tonic-gate } 27480Sstevel@tonic-gate 27490Sstevel@tonic-gate /* Must not use seg now as it could be re-used. */ 27500Sstevel@tonic-gate 27510Sstevel@tonic-gate memlist_write_lock(); 27520Sstevel@tonic-gate 27530Sstevel@tonic-gate mlret = memlist_delete_span( 27540Sstevel@tonic-gate (uint64_t)(mseg_base) << PAGESHIFT, 27550Sstevel@tonic-gate (uint64_t)(mseg_npgs) << PAGESHIFT, 27560Sstevel@tonic-gate &phys_avail); 27570Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK); 27580Sstevel@tonic-gate 27590Sstevel@tonic-gate mlret = memlist_delete_span( 27600Sstevel@tonic-gate (uint64_t)(mseg_start) << PAGESHIFT, 27610Sstevel@tonic-gate (uint64_t)(mseg_end - mseg_start) << 27620Sstevel@tonic-gate PAGESHIFT, 27630Sstevel@tonic-gate &phys_install); 27640Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK); 27650Sstevel@tonic-gate phys_install_has_changed(); 27660Sstevel@tonic-gate 27670Sstevel@tonic-gate memlist_write_unlock(); 27680Sstevel@tonic-gate } 27690Sstevel@tonic-gate 27700Sstevel@tonic-gate memlist_read_lock(); 27710Sstevel@tonic-gate installed_top_size(phys_install, &physmax, &physinstalled); 27720Sstevel@tonic-gate memlist_read_unlock(); 27730Sstevel@tonic-gate 27740Sstevel@tonic-gate mutex_enter(&freemem_lock); 27750Sstevel@tonic-gate maxmem -= avpgs; 27760Sstevel@tonic-gate physmem -= avpgs; 27770Sstevel@tonic-gate /* availrmem is adjusted during the delete. */ 27780Sstevel@tonic-gate availrmem_initial -= avpgs; 27790Sstevel@tonic-gate 27800Sstevel@tonic-gate mutex_exit(&freemem_lock); 27810Sstevel@tonic-gate 27820Sstevel@tonic-gate dump_resize(); 27830Sstevel@tonic-gate 27840Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_delete: mem = %ldK " 27850Sstevel@tonic-gate "(0x%" PRIx64 ")\n", 27860Sstevel@tonic-gate physinstalled << (PAGESHIFT - 10), 27870Sstevel@tonic-gate (uint64_t)physinstalled << PAGESHIFT); 27880Sstevel@tonic-gate 27890Sstevel@tonic-gate avmem = (uint64_t)freemem << PAGESHIFT; 27900Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_delete: " 27910Sstevel@tonic-gate "avail mem = %" PRId64 "\n", avmem); 27920Sstevel@tonic-gate 27930Sstevel@tonic-gate /* 27940Sstevel@tonic-gate * Update lgroup generation number on single lgroup systems 27950Sstevel@tonic-gate */ 27960Sstevel@tonic-gate if (nlgrps == 1) 27970Sstevel@tonic-gate lgrp_config(LGRP_CONFIG_GEN_UPDATE, 0, 0); 27980Sstevel@tonic-gate 27990Sstevel@tonic-gate /* Successfully deleted system memory */ 28000Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex); 28010Sstevel@tonic-gate } 28020Sstevel@tonic-gate 28030Sstevel@tonic-gate static uint_t mdel_nullvp_waiter; 28040Sstevel@tonic-gate 28050Sstevel@tonic-gate static void 28060Sstevel@tonic-gate page_delete_collect( 28070Sstevel@tonic-gate page_t *pp, 28080Sstevel@tonic-gate struct mem_handle *mhp) 28090Sstevel@tonic-gate { 28100Sstevel@tonic-gate if (pp->p_vnode) { 28110Sstevel@tonic-gate page_hashout(pp, (kmutex_t *)NULL); 28120Sstevel@tonic-gate /* do not do PP_SETAGED(pp); */ 28130Sstevel@tonic-gate } else { 28140Sstevel@tonic-gate kmutex_t *sep; 28150Sstevel@tonic-gate 28160Sstevel@tonic-gate sep = page_se_mutex(pp); 28170Sstevel@tonic-gate mutex_enter(sep); 28180Sstevel@tonic-gate if (CV_HAS_WAITERS(&pp->p_cv)) { 28190Sstevel@tonic-gate mdel_nullvp_waiter++; 28200Sstevel@tonic-gate cv_broadcast(&pp->p_cv); 28210Sstevel@tonic-gate } 28220Sstevel@tonic-gate mutex_exit(sep); 28230Sstevel@tonic-gate } 28240Sstevel@tonic-gate ASSERT(pp->p_next == pp->p_prev); 28250Sstevel@tonic-gate ASSERT(pp->p_next == NULL || pp->p_next == pp); 28260Sstevel@tonic-gate pp->p_next = mhp->mh_deleted; 28270Sstevel@tonic-gate mhp->mh_deleted = pp; 28280Sstevel@tonic-gate ASSERT(mhp->mh_hold_todo != 0); 28290Sstevel@tonic-gate mhp->mh_hold_todo--; 28300Sstevel@tonic-gate } 28310Sstevel@tonic-gate 28320Sstevel@tonic-gate static void 28330Sstevel@tonic-gate transit_list_collect(struct mem_handle *mhp, int v) 28340Sstevel@tonic-gate { 28350Sstevel@tonic-gate struct transit_list_head *trh; 28360Sstevel@tonic-gate 28370Sstevel@tonic-gate trh = &transit_list_head; 28380Sstevel@tonic-gate mutex_enter(&trh->trh_lock); 28390Sstevel@tonic-gate mhp->mh_transit.trl_collect = v; 28400Sstevel@tonic-gate mutex_exit(&trh->trh_lock); 28410Sstevel@tonic-gate } 28420Sstevel@tonic-gate 28430Sstevel@tonic-gate static void 28440Sstevel@tonic-gate transit_list_insert(struct transit_list *tlp) 28450Sstevel@tonic-gate { 28460Sstevel@tonic-gate struct transit_list_head *trh; 28470Sstevel@tonic-gate 28480Sstevel@tonic-gate trh = &transit_list_head; 28490Sstevel@tonic-gate ASSERT(MUTEX_HELD(&trh->trh_lock)); 28500Sstevel@tonic-gate tlp->trl_next = trh->trh_head; 28510Sstevel@tonic-gate trh->trh_head = tlp; 28520Sstevel@tonic-gate } 28530Sstevel@tonic-gate 28540Sstevel@tonic-gate static void 28550Sstevel@tonic-gate transit_list_remove(struct transit_list *tlp) 28560Sstevel@tonic-gate { 28570Sstevel@tonic-gate struct transit_list_head *trh; 28580Sstevel@tonic-gate struct transit_list **tlpp; 28590Sstevel@tonic-gate 28600Sstevel@tonic-gate trh = &transit_list_head; 28610Sstevel@tonic-gate tlpp = &trh->trh_head; 28620Sstevel@tonic-gate ASSERT(MUTEX_HELD(&trh->trh_lock)); 28630Sstevel@tonic-gate while (*tlpp != NULL && *tlpp != tlp) 28640Sstevel@tonic-gate tlpp = &(*tlpp)->trl_next; 28650Sstevel@tonic-gate ASSERT(*tlpp != NULL); 28660Sstevel@tonic-gate if (*tlpp == tlp) 28670Sstevel@tonic-gate *tlpp = tlp->trl_next; 28680Sstevel@tonic-gate tlp->trl_next = NULL; 28690Sstevel@tonic-gate } 28700Sstevel@tonic-gate 28710Sstevel@tonic-gate static struct transit_list * 28720Sstevel@tonic-gate pfnum_to_transit_list(struct transit_list_head *trh, pfn_t pfnum) 28730Sstevel@tonic-gate { 28740Sstevel@tonic-gate struct transit_list *tlp; 28750Sstevel@tonic-gate 28760Sstevel@tonic-gate for (tlp = trh->trh_head; tlp != NULL; tlp = tlp->trl_next) { 28770Sstevel@tonic-gate struct memdelspan *mdsp; 28780Sstevel@tonic-gate 28790Sstevel@tonic-gate for (mdsp = tlp->trl_spans; mdsp != NULL; 28800Sstevel@tonic-gate mdsp = mdsp->mds_next) { 28810Sstevel@tonic-gate if (pfnum >= mdsp->mds_base && 28820Sstevel@tonic-gate pfnum < (mdsp->mds_base + mdsp->mds_npgs)) { 28830Sstevel@tonic-gate return (tlp); 28840Sstevel@tonic-gate } 28850Sstevel@tonic-gate } 28860Sstevel@tonic-gate } 28870Sstevel@tonic-gate return (NULL); 28880Sstevel@tonic-gate } 28890Sstevel@tonic-gate 28900Sstevel@tonic-gate int 28910Sstevel@tonic-gate pfn_is_being_deleted(pfn_t pfnum) 28920Sstevel@tonic-gate { 28930Sstevel@tonic-gate struct transit_list_head *trh; 28940Sstevel@tonic-gate struct transit_list *tlp; 28950Sstevel@tonic-gate int ret; 28960Sstevel@tonic-gate 28970Sstevel@tonic-gate trh = &transit_list_head; 28980Sstevel@tonic-gate if (trh->trh_head == NULL) 28990Sstevel@tonic-gate return (0); 29000Sstevel@tonic-gate 29010Sstevel@tonic-gate mutex_enter(&trh->trh_lock); 29020Sstevel@tonic-gate tlp = pfnum_to_transit_list(trh, pfnum); 29030Sstevel@tonic-gate ret = (tlp != NULL && tlp->trl_collect); 29040Sstevel@tonic-gate mutex_exit(&trh->trh_lock); 29050Sstevel@tonic-gate 29060Sstevel@tonic-gate return (ret); 29070Sstevel@tonic-gate } 29080Sstevel@tonic-gate 29090Sstevel@tonic-gate #ifdef MEM_DEL_STATS 29100Sstevel@tonic-gate extern int hz; 29110Sstevel@tonic-gate static void 29120Sstevel@tonic-gate mem_del_stat_print_func(struct mem_handle *mhp) 29130Sstevel@tonic-gate { 29140Sstevel@tonic-gate uint64_t tmp; 29150Sstevel@tonic-gate 29160Sstevel@tonic-gate if (mem_del_stat_print) { 29170Sstevel@tonic-gate printf("memory delete loop %x/%x, statistics%s\n", 29180Sstevel@tonic-gate (uint_t)mhp->mh_transit.trl_spans->mds_base, 29190Sstevel@tonic-gate (uint_t)mhp->mh_transit.trl_spans->mds_npgs, 29200Sstevel@tonic-gate (mhp->mh_cancel ? " (cancelled)" : "")); 29210Sstevel@tonic-gate printf("\t%8u nloop\n", mhp->mh_delstat.nloop); 29220Sstevel@tonic-gate printf("\t%8u need_free\n", mhp->mh_delstat.need_free); 29230Sstevel@tonic-gate printf("\t%8u free_loop\n", mhp->mh_delstat.free_loop); 29240Sstevel@tonic-gate printf("\t%8u free_low\n", mhp->mh_delstat.free_low); 29250Sstevel@tonic-gate printf("\t%8u free_failed\n", mhp->mh_delstat.free_failed); 29260Sstevel@tonic-gate printf("\t%8u ncheck\n", mhp->mh_delstat.ncheck); 29270Sstevel@tonic-gate printf("\t%8u nopaget\n", mhp->mh_delstat.nopaget); 29280Sstevel@tonic-gate printf("\t%8u lockfail\n", mhp->mh_delstat.lockfail); 29290Sstevel@tonic-gate printf("\t%8u nfree\n", mhp->mh_delstat.nfree); 29300Sstevel@tonic-gate printf("\t%8u nreloc\n", mhp->mh_delstat.nreloc); 29310Sstevel@tonic-gate printf("\t%8u nrelocfail\n", mhp->mh_delstat.nrelocfail); 29320Sstevel@tonic-gate printf("\t%8u already_done\n", mhp->mh_delstat.already_done); 29330Sstevel@tonic-gate printf("\t%8u first_notfree\n", mhp->mh_delstat.first_notfree); 29340Sstevel@tonic-gate printf("\t%8u npplocked\n", mhp->mh_delstat.npplocked); 29350Sstevel@tonic-gate printf("\t%8u nlockreloc\n", mhp->mh_delstat.nlockreloc); 29360Sstevel@tonic-gate printf("\t%8u nnorepl\n", mhp->mh_delstat.nnorepl); 29370Sstevel@tonic-gate printf("\t%8u nmodreloc\n", mhp->mh_delstat.nmodreloc); 29380Sstevel@tonic-gate printf("\t%8u ndestroy\n", mhp->mh_delstat.ndestroy); 29390Sstevel@tonic-gate printf("\t%8u nputpage\n", mhp->mh_delstat.nputpage); 29400Sstevel@tonic-gate printf("\t%8u nnoreclaim\n", mhp->mh_delstat.nnoreclaim); 29410Sstevel@tonic-gate printf("\t%8u ndelay\n", mhp->mh_delstat.ndelay); 29420Sstevel@tonic-gate printf("\t%8u demotefail\n", mhp->mh_delstat.demotefail); 29430Sstevel@tonic-gate printf("\t%8u retired\n", mhp->mh_delstat.retired); 29440Sstevel@tonic-gate printf("\t%8u toxic\n", mhp->mh_delstat.toxic); 29450Sstevel@tonic-gate printf("\t%8u failing\n", mhp->mh_delstat.failing); 29460Sstevel@tonic-gate printf("\t%8u modtoxic\n", mhp->mh_delstat.modtoxic); 29470Sstevel@tonic-gate printf("\t%8u npplkdtoxic\n", mhp->mh_delstat.npplkdtoxic); 29480Sstevel@tonic-gate printf("\t%8u gptlmodfail\n", mhp->mh_delstat.gptlmodfail); 29490Sstevel@tonic-gate printf("\t%8u gptllckfail\n", mhp->mh_delstat.gptllckfail); 29500Sstevel@tonic-gate tmp = mhp->mh_delstat.nticks_total / hz; /* seconds */ 29510Sstevel@tonic-gate printf( 29520Sstevel@tonic-gate "\t%"PRIu64" nticks_total - %"PRIu64" min %"PRIu64" sec\n", 29530Sstevel@tonic-gate mhp->mh_delstat.nticks_total, tmp / 60, tmp % 60); 29540Sstevel@tonic-gate 29550Sstevel@tonic-gate tmp = mhp->mh_delstat.nticks_pgrp / hz; /* seconds */ 29560Sstevel@tonic-gate printf( 29570Sstevel@tonic-gate "\t%"PRIu64" nticks_pgrp - %"PRIu64" min %"PRIu64" sec\n", 29580Sstevel@tonic-gate mhp->mh_delstat.nticks_pgrp, tmp / 60, tmp % 60); 29590Sstevel@tonic-gate } 29600Sstevel@tonic-gate } 29610Sstevel@tonic-gate #endif /* MEM_DEL_STATS */ 29620Sstevel@tonic-gate 29630Sstevel@tonic-gate struct mem_callback { 29640Sstevel@tonic-gate kphysm_setup_vector_t *vec; 29650Sstevel@tonic-gate void *arg; 29660Sstevel@tonic-gate }; 29670Sstevel@tonic-gate 29680Sstevel@tonic-gate #define NMEMCALLBACKS 100 29690Sstevel@tonic-gate 29700Sstevel@tonic-gate static struct mem_callback mem_callbacks[NMEMCALLBACKS]; 29710Sstevel@tonic-gate static uint_t nmemcallbacks; 29720Sstevel@tonic-gate static krwlock_t mem_callback_rwlock; 29730Sstevel@tonic-gate 29740Sstevel@tonic-gate int 29750Sstevel@tonic-gate kphysm_setup_func_register(kphysm_setup_vector_t *vec, void *arg) 29760Sstevel@tonic-gate { 29770Sstevel@tonic-gate uint_t i, found; 29780Sstevel@tonic-gate 29790Sstevel@tonic-gate /* 29800Sstevel@tonic-gate * This test will become more complicated when the version must 29810Sstevel@tonic-gate * change. 29820Sstevel@tonic-gate */ 29830Sstevel@tonic-gate if (vec->version != KPHYSM_SETUP_VECTOR_VERSION) 29840Sstevel@tonic-gate return (EINVAL); 29850Sstevel@tonic-gate 29860Sstevel@tonic-gate if (vec->post_add == NULL || vec->pre_del == NULL || 29870Sstevel@tonic-gate vec->post_del == NULL) 29880Sstevel@tonic-gate return (EINVAL); 29890Sstevel@tonic-gate 29900Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_WRITER); 29910Sstevel@tonic-gate for (i = 0, found = 0; i < nmemcallbacks; i++) { 29920Sstevel@tonic-gate if (mem_callbacks[i].vec == NULL && found == 0) 29930Sstevel@tonic-gate found = i + 1; 29940Sstevel@tonic-gate if (mem_callbacks[i].vec == vec && 29950Sstevel@tonic-gate mem_callbacks[i].arg == arg) { 29960Sstevel@tonic-gate #ifdef DEBUG 29970Sstevel@tonic-gate /* Catch this in DEBUG kernels. */ 29980Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_setup_func_register" 29990Sstevel@tonic-gate "(0x%p, 0x%p) duplicate registration from 0x%p", 30000Sstevel@tonic-gate (void *)vec, arg, (void *)caller()); 30010Sstevel@tonic-gate #endif /* DEBUG */ 30020Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 30030Sstevel@tonic-gate return (EEXIST); 30040Sstevel@tonic-gate } 30050Sstevel@tonic-gate } 30060Sstevel@tonic-gate if (found != 0) { 30070Sstevel@tonic-gate i = found - 1; 30080Sstevel@tonic-gate } else { 30090Sstevel@tonic-gate ASSERT(nmemcallbacks < NMEMCALLBACKS); 30100Sstevel@tonic-gate if (nmemcallbacks == NMEMCALLBACKS) { 30110Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 30120Sstevel@tonic-gate return (ENOMEM); 30130Sstevel@tonic-gate } 30140Sstevel@tonic-gate i = nmemcallbacks++; 30150Sstevel@tonic-gate } 30160Sstevel@tonic-gate mem_callbacks[i].vec = vec; 30170Sstevel@tonic-gate mem_callbacks[i].arg = arg; 30180Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 30190Sstevel@tonic-gate return (0); 30200Sstevel@tonic-gate } 30210Sstevel@tonic-gate 30220Sstevel@tonic-gate void 30230Sstevel@tonic-gate kphysm_setup_func_unregister(kphysm_setup_vector_t *vec, void *arg) 30240Sstevel@tonic-gate { 30250Sstevel@tonic-gate uint_t i; 30260Sstevel@tonic-gate 30270Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_WRITER); 30280Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) { 30290Sstevel@tonic-gate if (mem_callbacks[i].vec == vec && 30300Sstevel@tonic-gate mem_callbacks[i].arg == arg) { 30310Sstevel@tonic-gate mem_callbacks[i].vec = NULL; 30320Sstevel@tonic-gate mem_callbacks[i].arg = NULL; 30330Sstevel@tonic-gate if (i == (nmemcallbacks - 1)) 30340Sstevel@tonic-gate nmemcallbacks--; 30350Sstevel@tonic-gate break; 30360Sstevel@tonic-gate } 30370Sstevel@tonic-gate } 30380Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 30390Sstevel@tonic-gate } 30400Sstevel@tonic-gate 30410Sstevel@tonic-gate static void 30420Sstevel@tonic-gate kphysm_setup_post_add(pgcnt_t delta_pages) 30430Sstevel@tonic-gate { 30440Sstevel@tonic-gate uint_t i; 30450Sstevel@tonic-gate 30460Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_READER); 30470Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) { 30480Sstevel@tonic-gate if (mem_callbacks[i].vec != NULL) { 30490Sstevel@tonic-gate (*mem_callbacks[i].vec->post_add) 30500Sstevel@tonic-gate (mem_callbacks[i].arg, delta_pages); 30510Sstevel@tonic-gate } 30520Sstevel@tonic-gate } 30530Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 30540Sstevel@tonic-gate } 30550Sstevel@tonic-gate 30560Sstevel@tonic-gate /* 30570Sstevel@tonic-gate * Note the locking between pre_del and post_del: The reader lock is held 30580Sstevel@tonic-gate * between the two calls to stop the set of functions from changing. 30590Sstevel@tonic-gate */ 30600Sstevel@tonic-gate 30610Sstevel@tonic-gate static int 30620Sstevel@tonic-gate kphysm_setup_pre_del(pgcnt_t delta_pages) 30630Sstevel@tonic-gate { 30640Sstevel@tonic-gate uint_t i; 30650Sstevel@tonic-gate int ret; 30660Sstevel@tonic-gate int aret; 30670Sstevel@tonic-gate 30680Sstevel@tonic-gate ret = 0; 30690Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_READER); 30700Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) { 30710Sstevel@tonic-gate if (mem_callbacks[i].vec != NULL) { 30720Sstevel@tonic-gate aret = (*mem_callbacks[i].vec->pre_del) 30730Sstevel@tonic-gate (mem_callbacks[i].arg, delta_pages); 30740Sstevel@tonic-gate ret |= aret; 30750Sstevel@tonic-gate } 30760Sstevel@tonic-gate } 30770Sstevel@tonic-gate 30780Sstevel@tonic-gate return (ret); 30790Sstevel@tonic-gate } 30800Sstevel@tonic-gate 30810Sstevel@tonic-gate static void 30820Sstevel@tonic-gate kphysm_setup_post_del(pgcnt_t delta_pages, int cancelled) 30830Sstevel@tonic-gate { 30840Sstevel@tonic-gate uint_t i; 30850Sstevel@tonic-gate 30860Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) { 30870Sstevel@tonic-gate if (mem_callbacks[i].vec != NULL) { 30880Sstevel@tonic-gate (*mem_callbacks[i].vec->post_del) 30890Sstevel@tonic-gate (mem_callbacks[i].arg, delta_pages, cancelled); 30900Sstevel@tonic-gate } 30910Sstevel@tonic-gate } 30920Sstevel@tonic-gate rw_exit(&mem_callback_rwlock); 30930Sstevel@tonic-gate } 30940Sstevel@tonic-gate 30950Sstevel@tonic-gate static int 30960Sstevel@tonic-gate kphysm_split_memseg( 30970Sstevel@tonic-gate pfn_t base, 30980Sstevel@tonic-gate pgcnt_t npgs) 30990Sstevel@tonic-gate { 31000Sstevel@tonic-gate struct memseg *seg; 31010Sstevel@tonic-gate struct memseg **segpp; 31020Sstevel@tonic-gate pgcnt_t size_low, size_high; 31030Sstevel@tonic-gate struct memseg *seg_low, *seg_mid, *seg_high; 31040Sstevel@tonic-gate 31050Sstevel@tonic-gate /* 31060Sstevel@tonic-gate * Lock the memsegs list against other updates now 31070Sstevel@tonic-gate */ 31080Sstevel@tonic-gate memsegs_lock(1); 31090Sstevel@tonic-gate 31100Sstevel@tonic-gate /* 31110Sstevel@tonic-gate * Find boot time memseg that wholly covers this area. 31120Sstevel@tonic-gate */ 31130Sstevel@tonic-gate 31140Sstevel@tonic-gate /* First find the memseg with page 'base' in it. */ 31150Sstevel@tonic-gate for (segpp = &memsegs; (seg = *segpp) != NULL; 31160Sstevel@tonic-gate segpp = &((*segpp)->next)) { 31170Sstevel@tonic-gate if (base >= seg->pages_base && base < seg->pages_end) 31180Sstevel@tonic-gate break; 31190Sstevel@tonic-gate } 31200Sstevel@tonic-gate if (seg == NULL) { 31210Sstevel@tonic-gate memsegs_unlock(1); 31220Sstevel@tonic-gate return (0); 31230Sstevel@tonic-gate } 312410106SJason.Beloro@Sun.COM if (memseg_includes_meta(seg)) { 31250Sstevel@tonic-gate memsegs_unlock(1); 31260Sstevel@tonic-gate return (0); 31270Sstevel@tonic-gate } 31280Sstevel@tonic-gate if ((base + npgs) > seg->pages_end) { 31290Sstevel@tonic-gate memsegs_unlock(1); 31300Sstevel@tonic-gate return (0); 31310Sstevel@tonic-gate } 31320Sstevel@tonic-gate 31330Sstevel@tonic-gate /* 31340Sstevel@tonic-gate * Work out the size of the two segments that will 31350Sstevel@tonic-gate * surround the new segment, one for low address 31360Sstevel@tonic-gate * and one for high. 31370Sstevel@tonic-gate */ 31380Sstevel@tonic-gate ASSERT(base >= seg->pages_base); 31390Sstevel@tonic-gate size_low = base - seg->pages_base; 31400Sstevel@tonic-gate ASSERT(seg->pages_end >= (base + npgs)); 31410Sstevel@tonic-gate size_high = seg->pages_end - (base + npgs); 31420Sstevel@tonic-gate 31430Sstevel@tonic-gate /* 31440Sstevel@tonic-gate * Sanity check. 31450Sstevel@tonic-gate */ 31460Sstevel@tonic-gate if ((size_low + size_high) == 0) { 31470Sstevel@tonic-gate memsegs_unlock(1); 31480Sstevel@tonic-gate return (0); 31490Sstevel@tonic-gate } 31500Sstevel@tonic-gate 31510Sstevel@tonic-gate /* 31520Sstevel@tonic-gate * Allocate the new structures. The old memseg will not be freed 31530Sstevel@tonic-gate * as there may be a reference to it. 31540Sstevel@tonic-gate */ 31550Sstevel@tonic-gate seg_low = NULL; 31560Sstevel@tonic-gate seg_high = NULL; 31570Sstevel@tonic-gate 315810106SJason.Beloro@Sun.COM if (size_low != 0) 315910106SJason.Beloro@Sun.COM seg_low = memseg_alloc(); 316010106SJason.Beloro@Sun.COM 316110106SJason.Beloro@Sun.COM seg_mid = memseg_alloc(); 316210106SJason.Beloro@Sun.COM 316310106SJason.Beloro@Sun.COM if (size_high != 0) 316410106SJason.Beloro@Sun.COM seg_high = memseg_alloc(); 31650Sstevel@tonic-gate 31660Sstevel@tonic-gate /* 31670Sstevel@tonic-gate * All allocation done now. 31680Sstevel@tonic-gate */ 31690Sstevel@tonic-gate if (size_low != 0) { 31700Sstevel@tonic-gate seg_low->pages = seg->pages; 31710Sstevel@tonic-gate seg_low->epages = seg_low->pages + size_low; 31720Sstevel@tonic-gate seg_low->pages_base = seg->pages_base; 31730Sstevel@tonic-gate seg_low->pages_end = seg_low->pages_base + size_low; 31740Sstevel@tonic-gate seg_low->next = seg_mid; 317510106SJason.Beloro@Sun.COM seg_low->msegflags = seg->msegflags; 31760Sstevel@tonic-gate } 31770Sstevel@tonic-gate if (size_high != 0) { 31780Sstevel@tonic-gate seg_high->pages = seg->epages - size_high; 31790Sstevel@tonic-gate seg_high->epages = seg_high->pages + size_high; 31800Sstevel@tonic-gate seg_high->pages_base = seg->pages_end - size_high; 31810Sstevel@tonic-gate seg_high->pages_end = seg_high->pages_base + size_high; 31820Sstevel@tonic-gate seg_high->next = seg->next; 318310106SJason.Beloro@Sun.COM seg_high->msegflags = seg->msegflags; 31840Sstevel@tonic-gate } 31850Sstevel@tonic-gate 31860Sstevel@tonic-gate seg_mid->pages = seg->pages + size_low; 31870Sstevel@tonic-gate seg_mid->pages_base = seg->pages_base + size_low; 31880Sstevel@tonic-gate seg_mid->epages = seg->epages - size_high; 31890Sstevel@tonic-gate seg_mid->pages_end = seg->pages_end - size_high; 31900Sstevel@tonic-gate seg_mid->next = (seg_high != NULL) ? seg_high : seg->next; 319110106SJason.Beloro@Sun.COM seg_mid->msegflags = seg->msegflags; 31920Sstevel@tonic-gate 31930Sstevel@tonic-gate /* 31940Sstevel@tonic-gate * Update hat_kpm specific info of all involved memsegs and 31950Sstevel@tonic-gate * allow hat_kpm specific global chain updates. 31960Sstevel@tonic-gate */ 31970Sstevel@tonic-gate hat_kpm_split_mseg_update(seg, segpp, seg_low, seg_mid, seg_high); 31980Sstevel@tonic-gate 31990Sstevel@tonic-gate /* 32000Sstevel@tonic-gate * At this point we have two equivalent memseg sub-chains, 32010Sstevel@tonic-gate * seg and seg_low/seg_mid/seg_high, which both chain on to 32020Sstevel@tonic-gate * the same place in the global chain. By re-writing the pointer 32030Sstevel@tonic-gate * in the previous element we switch atomically from using the old 32040Sstevel@tonic-gate * (seg) to the new. 32050Sstevel@tonic-gate */ 32060Sstevel@tonic-gate *segpp = (seg_low != NULL) ? seg_low : seg_mid; 32070Sstevel@tonic-gate 32080Sstevel@tonic-gate membar_enter(); 32090Sstevel@tonic-gate 32100Sstevel@tonic-gate build_pfn_hash(); 32110Sstevel@tonic-gate memsegs_unlock(1); 32120Sstevel@tonic-gate 32130Sstevel@tonic-gate /* 32140Sstevel@tonic-gate * We leave the old segment, 'seg', intact as there may be 32150Sstevel@tonic-gate * references to it. Also, as the value of total_pages has not 32160Sstevel@tonic-gate * changed and the memsegs list is effectively the same when 32170Sstevel@tonic-gate * accessed via the old or the new pointer, we do not have to 32180Sstevel@tonic-gate * cause pageout_scanner() to re-evaluate its hand pointers. 32190Sstevel@tonic-gate * 32200Sstevel@tonic-gate * We currently do not re-use or reclaim the page_t memory. 32210Sstevel@tonic-gate * If we do, then this may have to change. 32220Sstevel@tonic-gate */ 32230Sstevel@tonic-gate 32240Sstevel@tonic-gate mutex_enter(&memseg_lists_lock); 32250Sstevel@tonic-gate seg->lnext = memseg_edit_junk; 32260Sstevel@tonic-gate memseg_edit_junk = seg; 32270Sstevel@tonic-gate mutex_exit(&memseg_lists_lock); 32280Sstevel@tonic-gate 32290Sstevel@tonic-gate return (1); 32300Sstevel@tonic-gate } 32310Sstevel@tonic-gate 32320Sstevel@tonic-gate /* 32330Sstevel@tonic-gate * The sfmmu hat layer (e.g.) accesses some parts of the memseg 32340Sstevel@tonic-gate * structure using physical addresses. Therefore a kmem_cache is 32350Sstevel@tonic-gate * used with KMC_NOHASH to avoid page crossings within a memseg 32360Sstevel@tonic-gate * structure. KMC_NOHASH requires that no external (outside of 32370Sstevel@tonic-gate * slab) information is allowed. This, in turn, implies that the 32380Sstevel@tonic-gate * cache's slabsize must be exactly a single page, since per-slab 32390Sstevel@tonic-gate * information (e.g. the freelist for the slab) is kept at the 32400Sstevel@tonic-gate * end of the slab, where it is easy to locate. Should be changed 32410Sstevel@tonic-gate * when a more obvious kmem_cache interface/flag will become 32420Sstevel@tonic-gate * available. 32430Sstevel@tonic-gate */ 32440Sstevel@tonic-gate void 32450Sstevel@tonic-gate mem_config_init() 32460Sstevel@tonic-gate { 32470Sstevel@tonic-gate memseg_cache = kmem_cache_create("memseg_cache", sizeof (struct memseg), 32486242Smb158278 0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH); 32490Sstevel@tonic-gate } 325010106SJason.Beloro@Sun.COM 325110106SJason.Beloro@Sun.COM struct memseg * 325210106SJason.Beloro@Sun.COM memseg_alloc() 325310106SJason.Beloro@Sun.COM { 325410106SJason.Beloro@Sun.COM struct memseg *seg; 325510106SJason.Beloro@Sun.COM 325610106SJason.Beloro@Sun.COM seg = kmem_cache_alloc(memseg_cache, KM_SLEEP); 325710106SJason.Beloro@Sun.COM bzero(seg, sizeof (struct memseg)); 325810106SJason.Beloro@Sun.COM 325910106SJason.Beloro@Sun.COM return (seg); 326010106SJason.Beloro@Sun.COM } 326110106SJason.Beloro@Sun.COM 326210106SJason.Beloro@Sun.COM /* 326310106SJason.Beloro@Sun.COM * Return whether the page_t memory for this memseg 326410106SJason.Beloro@Sun.COM * is included in the memseg itself. 326510106SJason.Beloro@Sun.COM */ 326610106SJason.Beloro@Sun.COM static int 326710106SJason.Beloro@Sun.COM memseg_includes_meta(struct memseg *seg) 326810106SJason.Beloro@Sun.COM { 326910106SJason.Beloro@Sun.COM return (seg->msegflags & MEMSEG_META_INCL); 327010106SJason.Beloro@Sun.COM } 327110106SJason.Beloro@Sun.COM 327210106SJason.Beloro@Sun.COM pfn_t 327310106SJason.Beloro@Sun.COM memseg_get_start(struct memseg *seg) 327410106SJason.Beloro@Sun.COM { 327510106SJason.Beloro@Sun.COM pfn_t pt_start; 327610106SJason.Beloro@Sun.COM 327710106SJason.Beloro@Sun.COM if (memseg_includes_meta(seg)) { 327810106SJason.Beloro@Sun.COM pt_start = hat_getpfnum(kas.a_hat, (caddr_t)seg->pages); 327910106SJason.Beloro@Sun.COM 328010106SJason.Beloro@Sun.COM /* Meta data is required to be at the beginning */ 328110106SJason.Beloro@Sun.COM ASSERT(pt_start < seg->pages_base); 328210106SJason.Beloro@Sun.COM } else 328310106SJason.Beloro@Sun.COM pt_start = seg->pages_base; 328410106SJason.Beloro@Sun.COM 328510106SJason.Beloro@Sun.COM return (pt_start); 328610106SJason.Beloro@Sun.COM } 328710106SJason.Beloro@Sun.COM 328810106SJason.Beloro@Sun.COM /* 328910106SJason.Beloro@Sun.COM * Invalidate memseg pointers in cpu private vm data caches. 329010106SJason.Beloro@Sun.COM */ 329110106SJason.Beloro@Sun.COM static void 329210106SJason.Beloro@Sun.COM memseg_cpu_vm_flush() 329310106SJason.Beloro@Sun.COM { 329410106SJason.Beloro@Sun.COM cpu_t *cp; 329510106SJason.Beloro@Sun.COM vm_cpu_data_t *vc; 329610106SJason.Beloro@Sun.COM 329710106SJason.Beloro@Sun.COM mutex_enter(&cpu_lock); 329810106SJason.Beloro@Sun.COM pause_cpus(NULL); 329910106SJason.Beloro@Sun.COM 330010106SJason.Beloro@Sun.COM cp = cpu_list; 330110106SJason.Beloro@Sun.COM do { 330210106SJason.Beloro@Sun.COM vc = cp->cpu_vm_data; 330310106SJason.Beloro@Sun.COM vc->vc_pnum_memseg = NULL; 330410106SJason.Beloro@Sun.COM vc->vc_pnext_memseg = NULL; 330510106SJason.Beloro@Sun.COM 330610106SJason.Beloro@Sun.COM } while ((cp = cp->cpu_next) != cpu_list); 330710106SJason.Beloro@Sun.COM 330810106SJason.Beloro@Sun.COM start_cpus(); 330910106SJason.Beloro@Sun.COM mutex_exit(&cpu_lock); 331010106SJason.Beloro@Sun.COM } 3311