10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*1900Seota * Common Development and Distribution License (the "License"). 6*1900Seota * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 22*1900Seota * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 270Sstevel@tonic-gate 280Sstevel@tonic-gate /* 290Sstevel@tonic-gate * PMEM - Direct mapping physical memory pages to userland process 300Sstevel@tonic-gate * 310Sstevel@tonic-gate * Provide functions used for directly (w/o occupying kernel virtual address 320Sstevel@tonic-gate * space) allocating and exporting physical memory pages to userland. 330Sstevel@tonic-gate */ 340Sstevel@tonic-gate 350Sstevel@tonic-gate #include <sys/types.h> 360Sstevel@tonic-gate #include <sys/mutex.h> 370Sstevel@tonic-gate #include <sys/sunddi.h> 380Sstevel@tonic-gate #include <sys/ddidevmap.h> 390Sstevel@tonic-gate #include <sys/vnode.h> 400Sstevel@tonic-gate #include <sys/sysmacros.h> 410Sstevel@tonic-gate #include <sys/project.h> 420Sstevel@tonic-gate #include <vm/seg_dev.h> 430Sstevel@tonic-gate #include <sys/pmem.h> 440Sstevel@tonic-gate #include <vm/hat_i86.h> 450Sstevel@tonic-gate #include <sys/task.h> 460Sstevel@tonic-gate #include <sys/sdt.h> 470Sstevel@tonic-gate 480Sstevel@tonic-gate /* 490Sstevel@tonic-gate * The routines in this file allocate memory which will be accessed through 500Sstevel@tonic-gate * the AGP GART hardware. The GART is programmed with the PFNs for this 510Sstevel@tonic-gate * memory, and the only mechanism for removing these entries is by an 520Sstevel@tonic-gate * explicit process operation (ioctl/close of the driver, or process exit). 530Sstevel@tonic-gate * As such, the pages need to remain locked to ensure that they won't be 540Sstevel@tonic-gate * relocated or paged out. 550Sstevel@tonic-gate * 560Sstevel@tonic-gate * To prevent these locked pages from getting in the way of page 570Sstevel@tonic-gate * coalescing, we try to allocate large pages from the system, and carve 580Sstevel@tonic-gate * them up to satisfy pmem allocation requests. This will keep the locked 590Sstevel@tonic-gate * pages within a constrained area of physical memory, limiting the number 600Sstevel@tonic-gate * of large pages that would be pinned by our locked pages. This is, of 610Sstevel@tonic-gate * course, another take on the infamous kernel cage, and it has many of the 620Sstevel@tonic-gate * downsides of the original cage. It also interferes with system-wide 630Sstevel@tonic-gate * resource management decisions, as it maintains its own pool of unused 640Sstevel@tonic-gate * pages which can't be easily reclaimed and used during low-memory 650Sstevel@tonic-gate * situations. 660Sstevel@tonic-gate * 670Sstevel@tonic-gate * The right solution is for pmem to register a callback that the VM system 680Sstevel@tonic-gate * could call, which would temporarily remove any GART entries for pages 690Sstevel@tonic-gate * that were being relocated. This would let us leave the pages unlocked, 700Sstevel@tonic-gate * which would remove the need for using large pages, which would simplify 710Sstevel@tonic-gate * this code a great deal. Unfortunately, the support for these callbacks 720Sstevel@tonic-gate * only exists on some SPARC platforms right now. 730Sstevel@tonic-gate * 740Sstevel@tonic-gate * Note that this is the *only* reason that large pages are used here. The 750Sstevel@tonic-gate * GART can't perform large-page translations, and the code appropriately 760Sstevel@tonic-gate * falls back to using small pages if page_create_va_large() fails. 770Sstevel@tonic-gate */ 780Sstevel@tonic-gate 790Sstevel@tonic-gate #define HOLD_DHP_LOCK(dhp) if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \ 800Sstevel@tonic-gate { mutex_enter(&dhp->dh_lock); } 810Sstevel@tonic-gate 820Sstevel@tonic-gate #define RELE_DHP_LOCK(dhp) if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \ 830Sstevel@tonic-gate { mutex_exit(&dhp->dh_lock); } 840Sstevel@tonic-gate 850Sstevel@tonic-gate #define FROM_LPG(pp) (pp->p_szc != 0) 860Sstevel@tonic-gate #define PFIND(pp) (page_pptonum(pp) & (pmem_pgcnt - 1)) 870Sstevel@tonic-gate 880Sstevel@tonic-gate /* 890Sstevel@tonic-gate * Structs and static variables used for pmem only. 900Sstevel@tonic-gate */ 910Sstevel@tonic-gate typedef struct pmem_lpg { 920Sstevel@tonic-gate page_t *pl_pp; /* start pp */ 930Sstevel@tonic-gate ulong_t *pl_bitmap; /* allocation status for each page */ 940Sstevel@tonic-gate ushort_t pl_pfree; /* this large page might be fully freed */ 950Sstevel@tonic-gate struct pmem_lpg *pl_next; 960Sstevel@tonic-gate struct pmem_lpg *pl_prev; 970Sstevel@tonic-gate } pmem_lpg_t; 980Sstevel@tonic-gate 990Sstevel@tonic-gate static size_t pmem_lpgsize; /* the size of one large page */ 1000Sstevel@tonic-gate static pgcnt_t pmem_pgcnt; /* the number of small pages in a large page */ 1010Sstevel@tonic-gate static uint_t pmem_lszc; /* page size code of the large page */ 1020Sstevel@tonic-gate /* The segment to be associated with all the allocated pages. */ 1030Sstevel@tonic-gate static struct seg pmem_seg; 1040Sstevel@tonic-gate /* Fully occupied large pages allocated for pmem. */ 1050Sstevel@tonic-gate static pmem_lpg_t *pmem_occ_lpgs; 1060Sstevel@tonic-gate /* Memory pool to store residual small pages from large pages. */ 1070Sstevel@tonic-gate static page_t *pmem_mpool = NULL; 1080Sstevel@tonic-gate /* Number of small pages reside in pmem_mpool currently. */ 1090Sstevel@tonic-gate static pgcnt_t pmem_nmpages = 0; 1100Sstevel@tonic-gate /* To protect pmem_nmpages, pmem_mpool and pmem_occ_lpgs. */ 1110Sstevel@tonic-gate kmutex_t pmem_mutex; 1120Sstevel@tonic-gate 1130Sstevel@tonic-gate static int lpg_isfree(pmem_lpg_t *); 1140Sstevel@tonic-gate static void pmem_lpg_sub(pmem_lpg_t **, pmem_lpg_t *); 1150Sstevel@tonic-gate static void pmem_lpg_concat(pmem_lpg_t **, pmem_lpg_t **); 1160Sstevel@tonic-gate static pmem_lpg_t *pmem_lpg_get(pmem_lpg_t *, page_t *, pmem_lpg_t **); 1170Sstevel@tonic-gate static pmem_lpg_t *pmem_lpg_alloc(uint_t); 1180Sstevel@tonic-gate static void pmem_lpg_free(pmem_lpg_t **, pmem_lpg_t *); 1190Sstevel@tonic-gate static void lpg_free(page_t *spp); 1200Sstevel@tonic-gate static pgcnt_t mpool_break(page_t **, pgcnt_t); 1210Sstevel@tonic-gate static void mpool_append(page_t **, pgcnt_t); 1220Sstevel@tonic-gate static void lpp_break(page_t **, pgcnt_t, pgcnt_t, pmem_lpg_t *); 1230Sstevel@tonic-gate static void lpp_free(page_t *, pgcnt_t, pmem_lpg_t **); 1240Sstevel@tonic-gate static int lpp_create(page_t **, pgcnt_t, pgcnt_t *, pmem_lpg_t **, 1250Sstevel@tonic-gate vnode_t *, u_offset_t *, uint_t); 1260Sstevel@tonic-gate static void tlist_in(page_t *, pgcnt_t, vnode_t *, u_offset_t *); 1270Sstevel@tonic-gate static void tlist_out(page_t *, pgcnt_t); 1280Sstevel@tonic-gate static int pmem_cookie_alloc(struct devmap_pmem_cookie **, pgcnt_t, uint_t); 1290Sstevel@tonic-gate static int pmem_lock(pgcnt_t, kproject_t **); 1300Sstevel@tonic-gate 1310Sstevel@tonic-gate /* 1320Sstevel@tonic-gate * Called by driver devmap routine to pass physical memory mapping info to 1330Sstevel@tonic-gate * seg_dev framework, used only for physical memory allocated from 1340Sstevel@tonic-gate * devmap_pmem_alloc(). 1350Sstevel@tonic-gate */ 1360Sstevel@tonic-gate /* ARGSUSED */ 1370Sstevel@tonic-gate int 1380Sstevel@tonic-gate devmap_pmem_setup(devmap_cookie_t dhc, dev_info_t *dip, 1390Sstevel@tonic-gate struct devmap_callback_ctl *callbackops, devmap_pmem_cookie_t cookie, 1400Sstevel@tonic-gate offset_t off, size_t len, uint_t maxprot, uint_t flags, 1410Sstevel@tonic-gate ddi_device_acc_attr_t *accattrp) 1420Sstevel@tonic-gate { 1430Sstevel@tonic-gate devmap_handle_t *dhp = (devmap_handle_t *)dhc; 1440Sstevel@tonic-gate struct devmap_pmem_cookie *pcp = (struct devmap_pmem_cookie *)cookie; 145*1900Seota uint_t cache_attr = IOMEM_CACHE_ATTR(flags); 1460Sstevel@tonic-gate 1470Sstevel@tonic-gate if (pcp == NULL || (off + len) > ptob(pcp->dp_npages)) 1480Sstevel@tonic-gate return (DDI_FAILURE); 1490Sstevel@tonic-gate 1500Sstevel@tonic-gate /* 1510Sstevel@tonic-gate * First to check if this function has been called for this dhp. 1520Sstevel@tonic-gate */ 1530Sstevel@tonic-gate if (dhp->dh_flags & DEVMAP_SETUP_DONE) 1540Sstevel@tonic-gate return (DDI_FAILURE); 1550Sstevel@tonic-gate 1560Sstevel@tonic-gate if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot) 1570Sstevel@tonic-gate return (DDI_FAILURE); 1580Sstevel@tonic-gate 159*1900Seota /* 160*1900Seota * Check if the cache attributes are supported. Need to pay 161*1900Seota * attention that only uncachable or write-combining is 162*1900Seota * permitted for pmem. 163*1900Seota */ 164*1900Seota if (i_ddi_check_cache_attr(flags) == B_FALSE || 165*1900Seota (cache_attr & (IOMEM_DATA_UNCACHED|IOMEM_DATA_UC_WR_COMBINE)) == 0) 166*1900Seota return (DDI_FAILURE); 167*1900Seota 1680Sstevel@tonic-gate if (flags & DEVMAP_MAPPING_INVALID) { 1690Sstevel@tonic-gate /* 1700Sstevel@tonic-gate * If DEVMAP_MAPPING_INVALID is specified, we have to grant 1710Sstevel@tonic-gate * remap permission. 1720Sstevel@tonic-gate */ 1730Sstevel@tonic-gate if (!(flags & DEVMAP_ALLOW_REMAP)) 1740Sstevel@tonic-gate return (DDI_FAILURE); 1750Sstevel@tonic-gate } else { 1760Sstevel@tonic-gate dhp->dh_pcookie = (devmap_pmem_cookie_t)pcp; 1770Sstevel@tonic-gate /* dh_roff is the offset inside the dh_pcookie. */ 1780Sstevel@tonic-gate dhp->dh_roff = ptob(btop(off)); 179*1900Seota /* Set the cache attributes correctly */ 180*1900Seota i_ddi_cacheattr_to_hatacc(cache_attr, &dhp->dh_hat_attr); 1810Sstevel@tonic-gate } 1820Sstevel@tonic-gate 1830Sstevel@tonic-gate dhp->dh_cookie = DEVMAP_PMEM_COOKIE; 1840Sstevel@tonic-gate dhp->dh_flags |= (flags & DEVMAP_SETUP_FLAGS); 1850Sstevel@tonic-gate dhp->dh_len = ptob(btopr(len)); 1860Sstevel@tonic-gate 1870Sstevel@tonic-gate dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot; 1880Sstevel@tonic-gate ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot); 1890Sstevel@tonic-gate 1900Sstevel@tonic-gate if (callbackops != NULL) { 1910Sstevel@tonic-gate bcopy(callbackops, &dhp->dh_callbackops, 1920Sstevel@tonic-gate sizeof (struct devmap_callback_ctl)); 1930Sstevel@tonic-gate } 1940Sstevel@tonic-gate 1950Sstevel@tonic-gate /* 1960Sstevel@tonic-gate * Initialize dh_lock if we want to do remap. 1970Sstevel@tonic-gate */ 1980Sstevel@tonic-gate if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) { 1990Sstevel@tonic-gate mutex_init(&dhp->dh_lock, NULL, MUTEX_DEFAULT, NULL); 2000Sstevel@tonic-gate dhp->dh_flags |= DEVMAP_LOCK_INITED; 2010Sstevel@tonic-gate } 2020Sstevel@tonic-gate 2030Sstevel@tonic-gate dhp->dh_flags |= DEVMAP_SETUP_DONE; 2040Sstevel@tonic-gate 2050Sstevel@tonic-gate return (DDI_SUCCESS); 2060Sstevel@tonic-gate } 2070Sstevel@tonic-gate 2080Sstevel@tonic-gate /* 2090Sstevel@tonic-gate * Replace existing mapping using a new cookie, mainly gets called when doing 2100Sstevel@tonic-gate * fork(). Should be called in associated devmap_dup(9E). 2110Sstevel@tonic-gate */ 2120Sstevel@tonic-gate /* ARGSUSED */ 2130Sstevel@tonic-gate int 2140Sstevel@tonic-gate devmap_pmem_remap(devmap_cookie_t dhc, dev_info_t *dip, 2150Sstevel@tonic-gate devmap_pmem_cookie_t cookie, offset_t off, size_t len, uint_t maxprot, 2160Sstevel@tonic-gate uint_t flags, ddi_device_acc_attr_t *accattrp) 2170Sstevel@tonic-gate { 2180Sstevel@tonic-gate devmap_handle_t *dhp = (devmap_handle_t *)dhc; 2190Sstevel@tonic-gate struct devmap_pmem_cookie *pcp = (struct devmap_pmem_cookie *)cookie; 220*1900Seota uint_t cache_attr = IOMEM_CACHE_ATTR(flags); 2210Sstevel@tonic-gate 2220Sstevel@tonic-gate /* 2230Sstevel@tonic-gate * Reture failure if setup has not been done or no remap permission 2240Sstevel@tonic-gate * has been granted during the setup. 2250Sstevel@tonic-gate */ 2260Sstevel@tonic-gate if ((dhp->dh_flags & DEVMAP_SETUP_DONE) == 0 || 2270Sstevel@tonic-gate (dhp->dh_flags & DEVMAP_ALLOW_REMAP) == 0) 2280Sstevel@tonic-gate return (DDI_FAILURE); 2290Sstevel@tonic-gate 2300Sstevel@tonic-gate /* No flags supported for remap yet. */ 2310Sstevel@tonic-gate if (flags != 0) 2320Sstevel@tonic-gate return (DDI_FAILURE); 2330Sstevel@tonic-gate 2340Sstevel@tonic-gate if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot) 2350Sstevel@tonic-gate return (DDI_FAILURE); 2360Sstevel@tonic-gate 2370Sstevel@tonic-gate if (pcp == NULL || (off + len) > ptob(pcp->dp_npages)) 2380Sstevel@tonic-gate return (DDI_FAILURE); 2390Sstevel@tonic-gate 240*1900Seota /* 241*1900Seota * Check if the cache attributes are supported. Need to pay 242*1900Seota * attention that only uncachable or write-combining is 243*1900Seota * permitted for pmem. 244*1900Seota */ 245*1900Seota if (i_ddi_check_cache_attr(flags) == B_FALSE || 246*1900Seota (cache_attr & (IOMEM_DATA_UNCACHED|IOMEM_DATA_UC_WR_COMBINE)) == 0) 247*1900Seota return (DDI_FAILURE); 248*1900Seota 2490Sstevel@tonic-gate HOLD_DHP_LOCK(dhp); 2500Sstevel@tonic-gate /* 2510Sstevel@tonic-gate * Unload the old mapping of pages reloated with this dhp, so next 2520Sstevel@tonic-gate * fault will setup the new mappings. It is in segdev_faultpage that 2530Sstevel@tonic-gate * calls hat_devload to establish the mapping. Do this while holding 2540Sstevel@tonic-gate * the dhp lock so other faults dont reestablish the mappings. 2550Sstevel@tonic-gate */ 2560Sstevel@tonic-gate hat_unload(dhp->dh_seg->s_as->a_hat, dhp->dh_uvaddr, 2570Sstevel@tonic-gate dhp->dh_len, HAT_UNLOAD|HAT_UNLOAD_OTHER); 2580Sstevel@tonic-gate 259*1900Seota /* Set the cache attributes correctly */ 260*1900Seota i_ddi_cacheattr_to_hatacc(cache_attr, &dhp->dh_hat_attr); 261*1900Seota 2620Sstevel@tonic-gate dhp->dh_pcookie = cookie; 2630Sstevel@tonic-gate dhp->dh_roff = ptob(btop(off)); 2640Sstevel@tonic-gate dhp->dh_len = ptob(btopr(len)); 2650Sstevel@tonic-gate 2660Sstevel@tonic-gate /* Clear the large page size flag. */ 2670Sstevel@tonic-gate dhp->dh_flags &= ~DEVMAP_FLAG_LARGE; 2680Sstevel@tonic-gate 2690Sstevel@tonic-gate dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot; 2700Sstevel@tonic-gate ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot); 2710Sstevel@tonic-gate RELE_DHP_LOCK(dhp); 2720Sstevel@tonic-gate return (DDI_SUCCESS); 2730Sstevel@tonic-gate } 2740Sstevel@tonic-gate 2750Sstevel@tonic-gate /* 2760Sstevel@tonic-gate * Directly (i.e., without occupying kernel virtual address space) allocate 2770Sstevel@tonic-gate * 'npages' physical memory pages for exporting to user land. The allocated 2780Sstevel@tonic-gate * page_t pointer will be recorded in cookie. 2790Sstevel@tonic-gate */ 2800Sstevel@tonic-gate int 2810Sstevel@tonic-gate devmap_pmem_alloc(size_t size, uint_t flags, devmap_pmem_cookie_t *cookiep) 2820Sstevel@tonic-gate { 2830Sstevel@tonic-gate u_offset_t pmem_off = 0; 2840Sstevel@tonic-gate page_t *pp = NULL; 2850Sstevel@tonic-gate page_t *lpp = NULL; 2860Sstevel@tonic-gate page_t *tlist = NULL; 2870Sstevel@tonic-gate pgcnt_t i = 0; 2880Sstevel@tonic-gate pgcnt_t rpages = 0; 2890Sstevel@tonic-gate pgcnt_t lpages = 0; 2900Sstevel@tonic-gate pgcnt_t tpages = 0; 2910Sstevel@tonic-gate pgcnt_t npages = btopr(size); 2920Sstevel@tonic-gate pmem_lpg_t *plp = NULL; 2930Sstevel@tonic-gate struct devmap_pmem_cookie *pcp; 2940Sstevel@tonic-gate uint_t reserved = 0; 2950Sstevel@tonic-gate uint_t locked = 0; 2960Sstevel@tonic-gate uint_t pflags, kflags; 2970Sstevel@tonic-gate 2980Sstevel@tonic-gate *cookiep = NULL; 2990Sstevel@tonic-gate 3000Sstevel@tonic-gate /* 3010Sstevel@tonic-gate * Number larger than this will cause page_create_va() to loop 3020Sstevel@tonic-gate * infinitely. 3030Sstevel@tonic-gate */ 3040Sstevel@tonic-gate if (npages == 0 || npages >= total_pages / 2) 3050Sstevel@tonic-gate return (DDI_FAILURE); 3060Sstevel@tonic-gate if ((flags & (PMEM_SLEEP | PMEM_NOSLEEP)) == 0) 3070Sstevel@tonic-gate return (DDI_FAILURE); 3080Sstevel@tonic-gate pflags = flags & PMEM_NOSLEEP ? PG_EXCL : PG_WAIT; 3090Sstevel@tonic-gate kflags = flags & PMEM_NOSLEEP ? KM_NOSLEEP : KM_SLEEP; 3100Sstevel@tonic-gate 3110Sstevel@tonic-gate /* Allocate pmem cookie. */ 3120Sstevel@tonic-gate if (pmem_cookie_alloc(&pcp, npages, kflags) == DDI_FAILURE) 3130Sstevel@tonic-gate return (DDI_FAILURE); 3140Sstevel@tonic-gate pcp->dp_npages = npages; 3150Sstevel@tonic-gate 3160Sstevel@tonic-gate /* 3170Sstevel@tonic-gate * See if the requested memory can be locked. Currently we do resource 3180Sstevel@tonic-gate * controls on the project levlel only. 3190Sstevel@tonic-gate */ 3200Sstevel@tonic-gate if (pmem_lock(npages, &(pcp->dp_projp)) == DDI_FAILURE) 3210Sstevel@tonic-gate goto alloc_fail; 3220Sstevel@tonic-gate locked = 1; 3230Sstevel@tonic-gate 3240Sstevel@tonic-gate /* 3250Sstevel@tonic-gate * First, grab as many as possible from pmem_mpool. If pages in 3260Sstevel@tonic-gate * pmem_mpool are enough for this request, we are done. 3270Sstevel@tonic-gate */ 3280Sstevel@tonic-gate mutex_enter(&pmem_mutex); 3290Sstevel@tonic-gate tpages = mpool_break(&tlist, npages); 3300Sstevel@tonic-gate /* IOlock and hashin them into the new offset. */ 3310Sstevel@tonic-gate if (tpages) 3320Sstevel@tonic-gate tlist_in(tlist, tpages, pcp->dp_vnp, &pmem_off); 3330Sstevel@tonic-gate mutex_exit(&pmem_mutex); 3340Sstevel@tonic-gate 3350Sstevel@tonic-gate if (tpages == npages) 3360Sstevel@tonic-gate goto done; 3370Sstevel@tonic-gate 3380Sstevel@tonic-gate rpages = npages - tpages; 3390Sstevel@tonic-gate /* Quit now if memory cannot be reserved. */ 3400Sstevel@tonic-gate if (!page_resv(rpages, kflags)) 3410Sstevel@tonic-gate goto alloc_fail; 3420Sstevel@tonic-gate reserved = 1; 3430Sstevel@tonic-gate 3440Sstevel@tonic-gate /* Try to allocate large pages first to decrease fragmentation. */ 3450Sstevel@tonic-gate i = (rpages + (pmem_pgcnt - 1)) / pmem_pgcnt; 3460Sstevel@tonic-gate if (lpp_create(&lpp, i, &lpages, &plp, pcp->dp_vnp, &pmem_off, 3470Sstevel@tonic-gate kflags) == DDI_FAILURE) 3480Sstevel@tonic-gate goto alloc_fail; 3490Sstevel@tonic-gate ASSERT(lpages == 0 ? lpp == NULL : 1); 3500Sstevel@tonic-gate 3510Sstevel@tonic-gate /* 3520Sstevel@tonic-gate * Pages in large pages is more than the request, put the residual 3530Sstevel@tonic-gate * pages into pmem_mpool. 3540Sstevel@tonic-gate */ 3550Sstevel@tonic-gate if (lpages >= rpages) { 3560Sstevel@tonic-gate lpp_break(&lpp, lpages, lpages - rpages, plp); 3570Sstevel@tonic-gate goto done; 3580Sstevel@tonic-gate } 3590Sstevel@tonic-gate 3600Sstevel@tonic-gate /* Allocate small pages if lpp+tlist cannot satisfy the request. */ 3610Sstevel@tonic-gate i = rpages - lpages; 3620Sstevel@tonic-gate if ((pp = page_create_va(pcp->dp_vnp, pmem_off, ptob(i), 363417Sms148562 pflags, &pmem_seg, (caddr_t)(uintptr_t)pmem_off)) == NULL) 3640Sstevel@tonic-gate goto alloc_fail; 3650Sstevel@tonic-gate 3660Sstevel@tonic-gate done: 3670Sstevel@tonic-gate page_list_concat(&tlist, &lpp); 3680Sstevel@tonic-gate page_list_concat(&tlist, &pp); 3690Sstevel@tonic-gate /* Set those small pages from large pages as allocated. */ 3700Sstevel@tonic-gate mutex_enter(&pmem_mutex); 3710Sstevel@tonic-gate pmem_lpg_concat(&pmem_occ_lpgs, &plp); 3720Sstevel@tonic-gate mutex_exit(&pmem_mutex); 3730Sstevel@tonic-gate 3740Sstevel@tonic-gate /* 3750Sstevel@tonic-gate * Now tlist holds all the pages for this cookie. Record these pages in 3760Sstevel@tonic-gate * pmem cookie. 3770Sstevel@tonic-gate */ 3780Sstevel@tonic-gate for (pp = tlist, i = 0; i < npages; i++) { 3790Sstevel@tonic-gate pcp->dp_pparray[i] = pp; 3800Sstevel@tonic-gate page_io_unlock(pp); 3810Sstevel@tonic-gate pp = pp->p_next; 3820Sstevel@tonic-gate page_sub(&tlist, pp->p_prev); 3830Sstevel@tonic-gate } 3840Sstevel@tonic-gate ASSERT(tlist == NULL); 3850Sstevel@tonic-gate *cookiep = (devmap_pmem_cookie_t)pcp; 3860Sstevel@tonic-gate 3870Sstevel@tonic-gate return (DDI_SUCCESS); 3880Sstevel@tonic-gate 3890Sstevel@tonic-gate alloc_fail: 3900Sstevel@tonic-gate DTRACE_PROBE(pmem__alloc__fail); 3910Sstevel@tonic-gate /* Free large pages and the associated allocation records. */ 3920Sstevel@tonic-gate if (lpp) 3930Sstevel@tonic-gate lpp_free(lpp, lpages / pmem_pgcnt, &plp); 3940Sstevel@tonic-gate if (reserved == 1) 3950Sstevel@tonic-gate page_unresv(rpages); 3960Sstevel@tonic-gate /* Put those pages in tlist back into pmem_mpool. */ 3970Sstevel@tonic-gate if (tpages != 0) { 3980Sstevel@tonic-gate mutex_enter(&pmem_mutex); 3990Sstevel@tonic-gate /* IOunlock, hashout and update the allocation records. */ 4000Sstevel@tonic-gate tlist_out(tlist, tpages); 4010Sstevel@tonic-gate mpool_append(&tlist, tpages); 4020Sstevel@tonic-gate mutex_exit(&pmem_mutex); 4030Sstevel@tonic-gate } 4040Sstevel@tonic-gate if (locked == 1) 4050Sstevel@tonic-gate i_ddi_decr_locked_memory(NULL, NULL, pcp->dp_projp, NULL, 4060Sstevel@tonic-gate ptob(pcp->dp_npages)); 4070Sstevel@tonic-gate /* Freeing pmem_cookie. */ 4080Sstevel@tonic-gate kmem_free(pcp->dp_vnp, sizeof (vnode_t)); 4090Sstevel@tonic-gate kmem_free(pcp->dp_pparray, npages * sizeof (page_t *)); 4100Sstevel@tonic-gate kmem_free(pcp, sizeof (struct devmap_pmem_cookie)); 4110Sstevel@tonic-gate return (DDI_FAILURE); 4120Sstevel@tonic-gate } 4130Sstevel@tonic-gate 4140Sstevel@tonic-gate /* 4150Sstevel@tonic-gate * Free all small pages inside cookie, and return pages from large pages into 4160Sstevel@tonic-gate * mpool, if all the pages from one large page is in mpool, free it as a whole. 4170Sstevel@tonic-gate */ 4180Sstevel@tonic-gate void 4190Sstevel@tonic-gate devmap_pmem_free(devmap_pmem_cookie_t cookie) 4200Sstevel@tonic-gate { 4210Sstevel@tonic-gate struct devmap_pmem_cookie *pcp = (struct devmap_pmem_cookie *)cookie; 4220Sstevel@tonic-gate pgcnt_t i; 4230Sstevel@tonic-gate pgcnt_t tpages = 0; 4240Sstevel@tonic-gate page_t *pp; 4250Sstevel@tonic-gate pmem_lpg_t *pl1, *plp; 4260Sstevel@tonic-gate pmem_lpg_t *pf_lpgs = NULL; 4270Sstevel@tonic-gate uint_t npls = 0; 4280Sstevel@tonic-gate pmem_lpg_t *last_pl = NULL; 4290Sstevel@tonic-gate pmem_lpg_t *plast_pl = NULL; 4300Sstevel@tonic-gate 4310Sstevel@tonic-gate ASSERT(pcp); 4320Sstevel@tonic-gate mutex_enter(&pmem_mutex); 4330Sstevel@tonic-gate /* Free small pages and return them to memory pool. */ 4340Sstevel@tonic-gate for (i = pcp->dp_npages; i > 0; i--) { 4350Sstevel@tonic-gate pp = pcp->dp_pparray[i - 1]; 4360Sstevel@tonic-gate page_hashout(pp, NULL); 4370Sstevel@tonic-gate /* 4380Sstevel@tonic-gate * Remove the mapping of this single page, this mapping is 4390Sstevel@tonic-gate * created using hat_devload() in segdev_faultpage(). 4400Sstevel@tonic-gate */ 4410Sstevel@tonic-gate (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD); 4420Sstevel@tonic-gate if (!FROM_LPG(pp)) { 4430Sstevel@tonic-gate /* Normal small page. */ 4440Sstevel@tonic-gate page_free(pp, 1); 4450Sstevel@tonic-gate page_unresv(1); 4460Sstevel@tonic-gate } else { 4470Sstevel@tonic-gate /* Small page from large pages. */ 4480Sstevel@tonic-gate plp = pmem_lpg_get(pmem_occ_lpgs, pp, &last_pl); 4490Sstevel@tonic-gate if (plp && !(plp->pl_pfree)) { 4500Sstevel@tonic-gate /* 4510Sstevel@tonic-gate * Move this record to pf_lpgs list, this large 4520Sstevel@tonic-gate * page may be able to be freed as a whole. 4530Sstevel@tonic-gate */ 4540Sstevel@tonic-gate pmem_lpg_sub(&pmem_occ_lpgs, plp); 4550Sstevel@tonic-gate pmem_lpg_concat(&pf_lpgs, &plp); 4560Sstevel@tonic-gate plp->pl_pfree = 1; 4570Sstevel@tonic-gate npls++; 4580Sstevel@tonic-gate last_pl = NULL; 4590Sstevel@tonic-gate } else { 4600Sstevel@tonic-gate /* Search in pf_lpgs list. */ 4610Sstevel@tonic-gate plp = pmem_lpg_get(pf_lpgs, pp, &plast_pl); 4620Sstevel@tonic-gate } 4630Sstevel@tonic-gate ASSERT(plp); 4640Sstevel@tonic-gate /* Mark this page as free. */ 4650Sstevel@tonic-gate BT_SET(plp->pl_bitmap, PFIND(pp)); 4660Sstevel@tonic-gate /* Record this page in pmem_mpool. */ 4670Sstevel@tonic-gate mpool_append(&pp, 1); 4680Sstevel@tonic-gate } 4690Sstevel@tonic-gate } 4700Sstevel@tonic-gate 4710Sstevel@tonic-gate /* 4720Sstevel@tonic-gate * Find out the large pages whose pages have been freed, remove them 4730Sstevel@tonic-gate * from plp list, free them and the associated pmem_lpg struct. 4740Sstevel@tonic-gate */ 4750Sstevel@tonic-gate for (plp = pf_lpgs; npls != 0; npls--) { 4760Sstevel@tonic-gate pl1 = plp; 4770Sstevel@tonic-gate plp = plp->pl_next; 4780Sstevel@tonic-gate if (lpg_isfree(pl1)) { 4790Sstevel@tonic-gate /* 4800Sstevel@tonic-gate * Get one free large page. Find all pages in this 4810Sstevel@tonic-gate * large page and remove them from pmem_mpool. 4820Sstevel@tonic-gate */ 4830Sstevel@tonic-gate lpg_free(pl1->pl_pp); 4840Sstevel@tonic-gate /* Remove associated allocation records. */ 4850Sstevel@tonic-gate pmem_lpg_sub(&pf_lpgs, pl1); 4860Sstevel@tonic-gate pmem_lpg_free(&pf_lpgs, pl1); 4870Sstevel@tonic-gate tpages -= pmem_pgcnt; 4880Sstevel@tonic-gate } else 4890Sstevel@tonic-gate pl1->pl_pfree = 0; 4900Sstevel@tonic-gate } 4910Sstevel@tonic-gate /* Update allocation records accordingly. */ 4920Sstevel@tonic-gate pmem_lpg_concat(&pmem_occ_lpgs, &pf_lpgs); 4930Sstevel@tonic-gate mutex_exit(&pmem_mutex); 4940Sstevel@tonic-gate 4950Sstevel@tonic-gate i_ddi_decr_locked_memory(NULL, NULL, (kproject_t *)pcp->dp_projp, NULL, 4960Sstevel@tonic-gate ptob(pcp->dp_npages)); 4970Sstevel@tonic-gate kmem_free(pcp->dp_vnp, sizeof (vnode_t)); 4980Sstevel@tonic-gate kmem_free(pcp->dp_pparray, pcp->dp_npages * sizeof (page_t *)); 4990Sstevel@tonic-gate kmem_free(pcp, sizeof (struct devmap_pmem_cookie)); 5000Sstevel@tonic-gate } 5010Sstevel@tonic-gate 5020Sstevel@tonic-gate /* 5030Sstevel@tonic-gate * To extract page frame number from specified range in a cookie. 5040Sstevel@tonic-gate */ 5050Sstevel@tonic-gate int 5060Sstevel@tonic-gate devmap_pmem_getpfns(devmap_pmem_cookie_t cookie, uint_t start, pgcnt_t npages, 5070Sstevel@tonic-gate pfn_t *pfnarray) 5080Sstevel@tonic-gate { 5090Sstevel@tonic-gate struct devmap_pmem_cookie *pcp = (struct devmap_pmem_cookie *)cookie; 5100Sstevel@tonic-gate pgcnt_t i; 5110Sstevel@tonic-gate 5120Sstevel@tonic-gate if (pcp == NULL || start + npages > pcp->dp_npages) 5130Sstevel@tonic-gate return (DDI_FAILURE); 5140Sstevel@tonic-gate 5150Sstevel@tonic-gate for (i = start; i < start + npages; i++) 5160Sstevel@tonic-gate pfnarray[i - start] = pcp->dp_pparray[i]->p_pagenum; 5170Sstevel@tonic-gate return (DDI_SUCCESS); 5180Sstevel@tonic-gate } 5190Sstevel@tonic-gate 5200Sstevel@tonic-gate void 5210Sstevel@tonic-gate pmem_init() 5220Sstevel@tonic-gate { 5230Sstevel@tonic-gate mutex_init(&pmem_mutex, NULL, MUTEX_DEFAULT, NULL); 5240Sstevel@tonic-gate pmem_lszc = MIN(1, page_num_pagesizes() - 1); 5250Sstevel@tonic-gate pmem_lpgsize = page_get_pagesize(pmem_lszc); 5260Sstevel@tonic-gate pmem_pgcnt = pmem_lpgsize >> PAGESHIFT; 5270Sstevel@tonic-gate bzero(&pmem_seg, sizeof (struct seg)); 5280Sstevel@tonic-gate pmem_seg.s_as = &kas; 5290Sstevel@tonic-gate } 5300Sstevel@tonic-gate 5310Sstevel@tonic-gate /* Allocate kernel memory for one pmem cookie with n pages. */ 5320Sstevel@tonic-gate static int 5330Sstevel@tonic-gate pmem_cookie_alloc(struct devmap_pmem_cookie **pcpp, pgcnt_t n, uint_t kflags) 5340Sstevel@tonic-gate { 5350Sstevel@tonic-gate struct devmap_pmem_cookie *pcp; 5360Sstevel@tonic-gate 5370Sstevel@tonic-gate if ((*pcpp = kmem_zalloc(sizeof (struct devmap_pmem_cookie), 5380Sstevel@tonic-gate kflags)) == NULL) 5390Sstevel@tonic-gate return (DDI_FAILURE); 5400Sstevel@tonic-gate pcp = *pcpp; 5410Sstevel@tonic-gate if ((pcp->dp_vnp = 5420Sstevel@tonic-gate kmem_zalloc(sizeof (vnode_t), kflags)) == NULL) { 5430Sstevel@tonic-gate kmem_free(pcp, sizeof (struct devmap_pmem_cookie)); 5440Sstevel@tonic-gate return (DDI_FAILURE); 5450Sstevel@tonic-gate } 5460Sstevel@tonic-gate if ((pcp->dp_pparray = 5470Sstevel@tonic-gate kmem_zalloc(n * sizeof (page_t *), kflags)) == NULL) { 5480Sstevel@tonic-gate kmem_free(pcp->dp_vnp, sizeof (vnode_t)); 5490Sstevel@tonic-gate kmem_free(pcp, sizeof (struct devmap_pmem_cookie)); 5500Sstevel@tonic-gate return (DDI_FAILURE); 5510Sstevel@tonic-gate } 5520Sstevel@tonic-gate return (DDI_SUCCESS); 5530Sstevel@tonic-gate } 5540Sstevel@tonic-gate 5550Sstevel@tonic-gate /* Try to lock down n pages resource for current project. */ 5560Sstevel@tonic-gate static int 5570Sstevel@tonic-gate pmem_lock(pgcnt_t n, kproject_t **prjpp) 5580Sstevel@tonic-gate { 5590Sstevel@tonic-gate mutex_enter(&curproc->p_lock); 5600Sstevel@tonic-gate if (i_ddi_incr_locked_memory(curproc, NULL, NULL, NULL, 5610Sstevel@tonic-gate ptob(n)) != 0) { 5620Sstevel@tonic-gate mutex_exit(&curproc->p_lock); 5630Sstevel@tonic-gate return (DDI_FAILURE); 5640Sstevel@tonic-gate } 5650Sstevel@tonic-gate /* Store this project in cookie for later lock/unlock. */ 5660Sstevel@tonic-gate *prjpp = curproc->p_task->tk_proj; 5670Sstevel@tonic-gate mutex_exit(&curproc->p_lock); 5680Sstevel@tonic-gate return (DDI_SUCCESS); 5690Sstevel@tonic-gate } 5700Sstevel@tonic-gate 5710Sstevel@tonic-gate /* To check if all the pages in a large page are freed. */ 5720Sstevel@tonic-gate static int 5730Sstevel@tonic-gate lpg_isfree(pmem_lpg_t *plp) 5740Sstevel@tonic-gate { 5750Sstevel@tonic-gate uint_t i; 5760Sstevel@tonic-gate 5770Sstevel@tonic-gate for (i = 0; i < BT_BITOUL(pmem_pgcnt); i++) 5780Sstevel@tonic-gate if (plp->pl_bitmap[i] != BT_ULMAXMASK) 5790Sstevel@tonic-gate return (0); 5800Sstevel@tonic-gate /* All 1 means all pages are freed. */ 5810Sstevel@tonic-gate return (1); 5820Sstevel@tonic-gate } 5830Sstevel@tonic-gate 5840Sstevel@tonic-gate /* 5850Sstevel@tonic-gate * Using pp to get the associated large page allocation record, searching in 5860Sstevel@tonic-gate * the splp linked list with *last as the heuristic pointer. Return NULL if 5870Sstevel@tonic-gate * not found. 5880Sstevel@tonic-gate */ 5890Sstevel@tonic-gate static pmem_lpg_t * 5900Sstevel@tonic-gate pmem_lpg_get(pmem_lpg_t *splp, page_t *pp, pmem_lpg_t **last) 5910Sstevel@tonic-gate { 5920Sstevel@tonic-gate pmem_lpg_t *plp; 5930Sstevel@tonic-gate pgcnt_t root_pfn; 5940Sstevel@tonic-gate 5950Sstevel@tonic-gate ASSERT(pp); 5960Sstevel@tonic-gate if (splp == NULL) 5970Sstevel@tonic-gate return (NULL); 5980Sstevel@tonic-gate root_pfn = page_pptonum(pp) & ~(pmem_pgcnt - 1); 5990Sstevel@tonic-gate 6000Sstevel@tonic-gate /* Try last winner first. */ 6010Sstevel@tonic-gate if (*last && root_pfn == page_pptonum((*last)->pl_pp)) 6020Sstevel@tonic-gate goto pl_found; 6030Sstevel@tonic-gate 6040Sstevel@tonic-gate /* Else search the whole pmem_lpg list. */ 6050Sstevel@tonic-gate for (plp = splp; root_pfn != page_pptonum(plp->pl_pp); ) { 6060Sstevel@tonic-gate plp = plp->pl_next; 6070Sstevel@tonic-gate if (plp == splp) { 6080Sstevel@tonic-gate plp = NULL; 6090Sstevel@tonic-gate break; 6100Sstevel@tonic-gate } 6110Sstevel@tonic-gate ASSERT(plp->pl_pp); 6120Sstevel@tonic-gate } 6130Sstevel@tonic-gate 6140Sstevel@tonic-gate *last = plp; 6150Sstevel@tonic-gate 6160Sstevel@tonic-gate pl_found: 6170Sstevel@tonic-gate return (*last); 6180Sstevel@tonic-gate } 6190Sstevel@tonic-gate 6200Sstevel@tonic-gate /* 6210Sstevel@tonic-gate * Remove one pmem_lpg plp from the oplpp list. 6220Sstevel@tonic-gate */ 6230Sstevel@tonic-gate static void 6240Sstevel@tonic-gate pmem_lpg_sub(pmem_lpg_t **oplpp, pmem_lpg_t *plp) 6250Sstevel@tonic-gate { 6260Sstevel@tonic-gate if (*oplpp == plp) 6270Sstevel@tonic-gate *oplpp = plp->pl_next; /* go to next pmem_lpg */ 6280Sstevel@tonic-gate 6290Sstevel@tonic-gate if (*oplpp == plp) 6300Sstevel@tonic-gate *oplpp = NULL; /* pmem_lpg list is gone */ 6310Sstevel@tonic-gate else { 6320Sstevel@tonic-gate plp->pl_prev->pl_next = plp->pl_next; 6330Sstevel@tonic-gate plp->pl_next->pl_prev = plp->pl_prev; 6340Sstevel@tonic-gate } 6350Sstevel@tonic-gate plp->pl_prev = plp->pl_next = plp; /* make plp a list of one */ 6360Sstevel@tonic-gate } 6370Sstevel@tonic-gate 6380Sstevel@tonic-gate /* 6390Sstevel@tonic-gate * Concatenate page list nplpp onto the end of list plpp. 6400Sstevel@tonic-gate */ 6410Sstevel@tonic-gate static void 6420Sstevel@tonic-gate pmem_lpg_concat(pmem_lpg_t **plpp, pmem_lpg_t **nplpp) 6430Sstevel@tonic-gate { 6440Sstevel@tonic-gate pmem_lpg_t *s1p, *s2p, *e1p, *e2p; 6450Sstevel@tonic-gate 6460Sstevel@tonic-gate if (*nplpp == NULL) { 6470Sstevel@tonic-gate return; 6480Sstevel@tonic-gate } 6490Sstevel@tonic-gate if (*plpp == NULL) { 6500Sstevel@tonic-gate *plpp = *nplpp; 6510Sstevel@tonic-gate return; 6520Sstevel@tonic-gate } 6530Sstevel@tonic-gate s1p = *plpp; 6540Sstevel@tonic-gate e1p = s1p->pl_prev; 6550Sstevel@tonic-gate s2p = *nplpp; 6560Sstevel@tonic-gate e2p = s2p->pl_prev; 6570Sstevel@tonic-gate s1p->pl_prev = e2p; 6580Sstevel@tonic-gate e2p->pl_next = s1p; 6590Sstevel@tonic-gate e1p->pl_next = s2p; 6600Sstevel@tonic-gate s2p->pl_prev = e1p; 6610Sstevel@tonic-gate } 6620Sstevel@tonic-gate 6630Sstevel@tonic-gate /* 6640Sstevel@tonic-gate * Allocate and initialize the allocation record of one large page, the init 6650Sstevel@tonic-gate * value is 'allocated'. 6660Sstevel@tonic-gate */ 6670Sstevel@tonic-gate static pmem_lpg_t * 6680Sstevel@tonic-gate pmem_lpg_alloc(uint_t kflags) 6690Sstevel@tonic-gate { 6700Sstevel@tonic-gate pmem_lpg_t *plp; 6710Sstevel@tonic-gate 6720Sstevel@tonic-gate ASSERT(pmem_pgcnt % BT_NBIPUL == 0); 6730Sstevel@tonic-gate plp = kmem_zalloc(sizeof (pmem_lpg_t), kflags); 6740Sstevel@tonic-gate if (plp == NULL) 6750Sstevel@tonic-gate return (NULL); 6760Sstevel@tonic-gate plp->pl_bitmap = kmem_zalloc(BT_SIZEOFMAP(pmem_pgcnt), kflags); 6770Sstevel@tonic-gate if (plp->pl_bitmap == NULL) { 6780Sstevel@tonic-gate kmem_free(plp, sizeof (*plp)); 6790Sstevel@tonic-gate return (NULL); 6800Sstevel@tonic-gate } 6810Sstevel@tonic-gate plp->pl_next = plp->pl_prev = plp; 6820Sstevel@tonic-gate return (plp); 6830Sstevel@tonic-gate } 6840Sstevel@tonic-gate 6850Sstevel@tonic-gate /* Free one allocation record pointed by oplp. */ 6860Sstevel@tonic-gate static void 6870Sstevel@tonic-gate pmem_lpg_free(pmem_lpg_t **headp, pmem_lpg_t *plp) 6880Sstevel@tonic-gate { 6890Sstevel@tonic-gate if (*headp == plp) 6900Sstevel@tonic-gate *headp = plp->pl_next; /* go to next pmem_lpg_t */ 6910Sstevel@tonic-gate 6920Sstevel@tonic-gate if (*headp == plp) 6930Sstevel@tonic-gate *headp = NULL; /* this list is gone */ 6940Sstevel@tonic-gate else { 6950Sstevel@tonic-gate plp->pl_prev->pl_next = plp->pl_next; 6960Sstevel@tonic-gate plp->pl_next->pl_prev = plp->pl_prev; 6970Sstevel@tonic-gate } 6980Sstevel@tonic-gate kmem_free(plp->pl_bitmap, BT_SIZEOFMAP(pmem_pgcnt)); 6990Sstevel@tonic-gate kmem_free(plp, sizeof (*plp)); 7000Sstevel@tonic-gate } 7010Sstevel@tonic-gate 7020Sstevel@tonic-gate /* Free one large page headed by spp from pmem_mpool. */ 7030Sstevel@tonic-gate static void 7040Sstevel@tonic-gate lpg_free(page_t *spp) 7050Sstevel@tonic-gate { 7060Sstevel@tonic-gate page_t *pp1 = spp; 7070Sstevel@tonic-gate uint_t i; 7080Sstevel@tonic-gate 7090Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pmem_mutex)); 7100Sstevel@tonic-gate for (i = 0; i < pmem_pgcnt; i++) { 7110Sstevel@tonic-gate /* Break pp1 from pmem_mpool. */ 7120Sstevel@tonic-gate page_sub(&pmem_mpool, pp1); 7130Sstevel@tonic-gate pp1++; 7140Sstevel@tonic-gate } 7150Sstevel@tonic-gate /* Free pages in this large page. */ 7160Sstevel@tonic-gate page_free_pages(spp); 7170Sstevel@tonic-gate page_unresv(pmem_pgcnt); 7180Sstevel@tonic-gate pmem_nmpages -= pmem_pgcnt; 7190Sstevel@tonic-gate ASSERT((pmem_nmpages && pmem_mpool) || (!pmem_nmpages && !pmem_mpool)); 7200Sstevel@tonic-gate } 7210Sstevel@tonic-gate 7220Sstevel@tonic-gate /* Put n pages in *ppp list back into pmem_mpool. */ 7230Sstevel@tonic-gate static void 7240Sstevel@tonic-gate mpool_append(page_t **ppp, pgcnt_t n) 7250Sstevel@tonic-gate { 7260Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pmem_mutex)); 7270Sstevel@tonic-gate /* Put back pages. */ 7280Sstevel@tonic-gate page_list_concat(&pmem_mpool, ppp); 7290Sstevel@tonic-gate pmem_nmpages += n; 7300Sstevel@tonic-gate ASSERT((pmem_nmpages && pmem_mpool) || (!pmem_nmpages && !pmem_mpool)); 7310Sstevel@tonic-gate } 7320Sstevel@tonic-gate 7330Sstevel@tonic-gate /* 7340Sstevel@tonic-gate * Try to grab MIN(pmem_nmpages, n) pages from pmem_mpool, put them into *ppp 7350Sstevel@tonic-gate * list, and return the number of grabbed pages. 7360Sstevel@tonic-gate */ 7370Sstevel@tonic-gate static pgcnt_t 7380Sstevel@tonic-gate mpool_break(page_t **ppp, pgcnt_t n) 7390Sstevel@tonic-gate { 7400Sstevel@tonic-gate pgcnt_t i; 7410Sstevel@tonic-gate 7420Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pmem_mutex)); 7430Sstevel@tonic-gate /* Grab the pages. */ 7440Sstevel@tonic-gate i = MIN(pmem_nmpages, n); 7450Sstevel@tonic-gate *ppp = pmem_mpool; 7460Sstevel@tonic-gate page_list_break(ppp, &pmem_mpool, i); 7470Sstevel@tonic-gate pmem_nmpages -= i; 7480Sstevel@tonic-gate ASSERT((pmem_nmpages && pmem_mpool) || (!pmem_nmpages && !pmem_mpool)); 7490Sstevel@tonic-gate return (i); 7500Sstevel@tonic-gate } 7510Sstevel@tonic-gate 7520Sstevel@tonic-gate /* 7530Sstevel@tonic-gate * Create n large pages, lpages and plpp contains the number of small pages and 7540Sstevel@tonic-gate * allocation records list respectively. 7550Sstevel@tonic-gate */ 7560Sstevel@tonic-gate static int 7570Sstevel@tonic-gate lpp_create(page_t **lppp, pgcnt_t n, pgcnt_t *lpages, pmem_lpg_t **plpp, 7580Sstevel@tonic-gate vnode_t *vnp, u_offset_t *offp, uint_t kflags) 7590Sstevel@tonic-gate { 7600Sstevel@tonic-gate pgcnt_t i; 7610Sstevel@tonic-gate pmem_lpg_t *plp; 7620Sstevel@tonic-gate page_t *pp; 7630Sstevel@tonic-gate 7640Sstevel@tonic-gate for (i = 0, *lpages = 0; i < n; i++) { 7650Sstevel@tonic-gate /* Allocte one large page each time. */ 7660Sstevel@tonic-gate pp = page_create_va_large(vnp, *offp, pmem_lpgsize, 767417Sms148562 PG_EXCL, &pmem_seg, (caddr_t)(uintptr_t)*offp, NULL); 7680Sstevel@tonic-gate if (pp == NULL) 7690Sstevel@tonic-gate break; 7700Sstevel@tonic-gate *offp += pmem_lpgsize; 7710Sstevel@tonic-gate page_list_concat(lppp, &pp); 7720Sstevel@tonic-gate *lpages += pmem_pgcnt; 7730Sstevel@tonic-gate /* Add one allocation record for this large page. */ 7740Sstevel@tonic-gate if ((plp = pmem_lpg_alloc(kflags)) == NULL) 7750Sstevel@tonic-gate return (DDI_FAILURE); 7760Sstevel@tonic-gate plp->pl_pp = pp; 7770Sstevel@tonic-gate pmem_lpg_concat(plpp, &plp); 7780Sstevel@tonic-gate } 7790Sstevel@tonic-gate return (DDI_SUCCESS); 7800Sstevel@tonic-gate } 7810Sstevel@tonic-gate 7820Sstevel@tonic-gate /* 7830Sstevel@tonic-gate * Break the last r small pages from the large page list *lppp (with totally n 7840Sstevel@tonic-gate * small pages) and put them into pmem_mpool. 7850Sstevel@tonic-gate */ 7860Sstevel@tonic-gate static void 7870Sstevel@tonic-gate lpp_break(page_t **lppp, pgcnt_t n, pgcnt_t r, pmem_lpg_t *oplp) 7880Sstevel@tonic-gate { 7890Sstevel@tonic-gate page_t *pp, *pp1; 7900Sstevel@tonic-gate pgcnt_t i; 7910Sstevel@tonic-gate pmem_lpg_t *plp; 7920Sstevel@tonic-gate 7930Sstevel@tonic-gate if (r == 0) 7940Sstevel@tonic-gate return; 7950Sstevel@tonic-gate ASSERT(*lppp != NULL && r < pmem_pgcnt); 7960Sstevel@tonic-gate page_list_break(lppp, &pp, n - r); 7970Sstevel@tonic-gate 7980Sstevel@tonic-gate /* The residual should reside in the last large page. */ 7990Sstevel@tonic-gate plp = oplp->pl_prev; 8000Sstevel@tonic-gate /* IOunlock and hashout the residual pages. */ 8010Sstevel@tonic-gate for (pp1 = pp, i = 0; i < r; i++) { 8020Sstevel@tonic-gate page_io_unlock(pp1); 8030Sstevel@tonic-gate page_hashout(pp1, NULL); 8040Sstevel@tonic-gate /* Mark this page as free. */ 8050Sstevel@tonic-gate BT_SET(plp->pl_bitmap, PFIND(pp1)); 8060Sstevel@tonic-gate pp1 = pp1->p_next; 8070Sstevel@tonic-gate } 8080Sstevel@tonic-gate ASSERT(pp1 == pp); 8090Sstevel@tonic-gate /* Put these residual pages into memory pool. */ 8100Sstevel@tonic-gate mutex_enter(&pmem_mutex); 8110Sstevel@tonic-gate mpool_append(&pp, r); 8120Sstevel@tonic-gate mutex_exit(&pmem_mutex); 8130Sstevel@tonic-gate } 8140Sstevel@tonic-gate 8150Sstevel@tonic-gate /* Freeing large pages in lpp and the associated allocation records in plp. */ 8160Sstevel@tonic-gate static void 8170Sstevel@tonic-gate lpp_free(page_t *lpp, pgcnt_t lpgs, pmem_lpg_t **plpp) 8180Sstevel@tonic-gate { 8190Sstevel@tonic-gate pgcnt_t i, j; 8200Sstevel@tonic-gate page_t *pp = lpp, *pp1; 8210Sstevel@tonic-gate pmem_lpg_t *plp1, *plp2; 8220Sstevel@tonic-gate 8230Sstevel@tonic-gate for (i = 0; i < lpgs; i++) { 8240Sstevel@tonic-gate for (j = 0; j < pmem_pgcnt; j++) { 8250Sstevel@tonic-gate /* IO unlock and hashout this small page. */ 8260Sstevel@tonic-gate page_io_unlock(pp); 8270Sstevel@tonic-gate page_hashout(pp, NULL); 8280Sstevel@tonic-gate pp1 = pp->p_next; 8290Sstevel@tonic-gate pp->p_prev = pp->p_next = pp; 8300Sstevel@tonic-gate pp = pp1; 8310Sstevel@tonic-gate } 8320Sstevel@tonic-gate /* Free one large page at one time. */ 8330Sstevel@tonic-gate page_free_pages(lpp); 8340Sstevel@tonic-gate lpp = pp; 8350Sstevel@tonic-gate } 8360Sstevel@tonic-gate /* Free associate pmem large page allocation records. */ 8370Sstevel@tonic-gate for (plp1 = *plpp; *plpp; plp1 = plp2) { 8380Sstevel@tonic-gate plp2 = plp1->pl_next; 8390Sstevel@tonic-gate pmem_lpg_free(plpp, plp1); 8400Sstevel@tonic-gate } 8410Sstevel@tonic-gate } 8420Sstevel@tonic-gate 8430Sstevel@tonic-gate /* 8440Sstevel@tonic-gate * IOlock and hashin all pages in tlist, associate them with vnode *pvnp 8450Sstevel@tonic-gate * and offset starting with *poffp. Update allocation records accordingly at 8460Sstevel@tonic-gate * the same time. 8470Sstevel@tonic-gate */ 8480Sstevel@tonic-gate static void 8490Sstevel@tonic-gate tlist_in(page_t *tlist, pgcnt_t tpages, vnode_t *pvnp, u_offset_t *poffp) 8500Sstevel@tonic-gate { 8510Sstevel@tonic-gate page_t *pp; 8520Sstevel@tonic-gate pgcnt_t i = 0; 8530Sstevel@tonic-gate pmem_lpg_t *plp, *last_pl = NULL; 8540Sstevel@tonic-gate 8550Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pmem_mutex)); 8560Sstevel@tonic-gate for (pp = tlist; i < tpages; i++) { 8570Sstevel@tonic-gate ASSERT(FROM_LPG(pp)); 8580Sstevel@tonic-gate page_io_lock(pp); 8590Sstevel@tonic-gate (void) page_hashin(pp, pvnp, *poffp, NULL); 8600Sstevel@tonic-gate plp = pmem_lpg_get(pmem_occ_lpgs, pp, &last_pl); 8610Sstevel@tonic-gate /* Mark this page as allocated. */ 8620Sstevel@tonic-gate BT_CLEAR(plp->pl_bitmap, PFIND(pp)); 8630Sstevel@tonic-gate *poffp += PAGESIZE; 8640Sstevel@tonic-gate pp = pp->p_next; 8650Sstevel@tonic-gate } 8660Sstevel@tonic-gate ASSERT(pp == tlist); 8670Sstevel@tonic-gate } 8680Sstevel@tonic-gate 8690Sstevel@tonic-gate /* 8700Sstevel@tonic-gate * IOunlock and hashout all pages in tlist, update allocation records 8710Sstevel@tonic-gate * accordingly at the same time. 8720Sstevel@tonic-gate */ 8730Sstevel@tonic-gate static void 8740Sstevel@tonic-gate tlist_out(page_t *tlist, pgcnt_t tpages) 8750Sstevel@tonic-gate { 8760Sstevel@tonic-gate page_t *pp; 8770Sstevel@tonic-gate pgcnt_t i = 0; 8780Sstevel@tonic-gate pmem_lpg_t *plp, *last_pl = NULL; 8790Sstevel@tonic-gate 8800Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pmem_mutex)); 8810Sstevel@tonic-gate for (pp = tlist; i < tpages; i++) { 8820Sstevel@tonic-gate ASSERT(FROM_LPG(pp)); 8830Sstevel@tonic-gate page_io_unlock(pp); 8840Sstevel@tonic-gate page_hashout(pp, NULL); 8850Sstevel@tonic-gate plp = pmem_lpg_get(pmem_occ_lpgs, pp, &last_pl); 8860Sstevel@tonic-gate /* Mark this page as free. */ 8870Sstevel@tonic-gate BT_SET(plp->pl_bitmap, PFIND(pp)); 8880Sstevel@tonic-gate pp = pp->p_next; 8890Sstevel@tonic-gate } 8900Sstevel@tonic-gate ASSERT(pp == tlist); 8910Sstevel@tonic-gate } 892