1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate */ 22*0Sstevel@tonic-gate /* 23*0Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*0Sstevel@tonic-gate * Use is subject to license terms. 25*0Sstevel@tonic-gate */ 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*0Sstevel@tonic-gate 29*0Sstevel@tonic-gate /* 30*0Sstevel@tonic-gate * PMEM - Direct mapping physical memory pages to userland process 31*0Sstevel@tonic-gate * 32*0Sstevel@tonic-gate * Provide functions used for directly (w/o occupying kernel virtual address 33*0Sstevel@tonic-gate * space) allocating and exporting physical memory pages to userland. 34*0Sstevel@tonic-gate */ 35*0Sstevel@tonic-gate 36*0Sstevel@tonic-gate #include <sys/types.h> 37*0Sstevel@tonic-gate #include <sys/mutex.h> 38*0Sstevel@tonic-gate #include <sys/sunddi.h> 39*0Sstevel@tonic-gate #include <sys/ddidevmap.h> 40*0Sstevel@tonic-gate #include <sys/vnode.h> 41*0Sstevel@tonic-gate #include <sys/sysmacros.h> 42*0Sstevel@tonic-gate #include <sys/project.h> 43*0Sstevel@tonic-gate #include <vm/seg_dev.h> 44*0Sstevel@tonic-gate #include <sys/pmem.h> 45*0Sstevel@tonic-gate #include <vm/hat_i86.h> 46*0Sstevel@tonic-gate #include <sys/task.h> 47*0Sstevel@tonic-gate #include <sys/sdt.h> 48*0Sstevel@tonic-gate 49*0Sstevel@tonic-gate /* 50*0Sstevel@tonic-gate * The routines in this file allocate memory which will be accessed through 51*0Sstevel@tonic-gate * the AGP GART hardware. The GART is programmed with the PFNs for this 52*0Sstevel@tonic-gate * memory, and the only mechanism for removing these entries is by an 53*0Sstevel@tonic-gate * explicit process operation (ioctl/close of the driver, or process exit). 54*0Sstevel@tonic-gate * As such, the pages need to remain locked to ensure that they won't be 55*0Sstevel@tonic-gate * relocated or paged out. 56*0Sstevel@tonic-gate * 57*0Sstevel@tonic-gate * To prevent these locked pages from getting in the way of page 58*0Sstevel@tonic-gate * coalescing, we try to allocate large pages from the system, and carve 59*0Sstevel@tonic-gate * them up to satisfy pmem allocation requests. This will keep the locked 60*0Sstevel@tonic-gate * pages within a constrained area of physical memory, limiting the number 61*0Sstevel@tonic-gate * of large pages that would be pinned by our locked pages. This is, of 62*0Sstevel@tonic-gate * course, another take on the infamous kernel cage, and it has many of the 63*0Sstevel@tonic-gate * downsides of the original cage. It also interferes with system-wide 64*0Sstevel@tonic-gate * resource management decisions, as it maintains its own pool of unused 65*0Sstevel@tonic-gate * pages which can't be easily reclaimed and used during low-memory 66*0Sstevel@tonic-gate * situations. 67*0Sstevel@tonic-gate * 68*0Sstevel@tonic-gate * The right solution is for pmem to register a callback that the VM system 69*0Sstevel@tonic-gate * could call, which would temporarily remove any GART entries for pages 70*0Sstevel@tonic-gate * that were being relocated. This would let us leave the pages unlocked, 71*0Sstevel@tonic-gate * which would remove the need for using large pages, which would simplify 72*0Sstevel@tonic-gate * this code a great deal. Unfortunately, the support for these callbacks 73*0Sstevel@tonic-gate * only exists on some SPARC platforms right now. 74*0Sstevel@tonic-gate * 75*0Sstevel@tonic-gate * Note that this is the *only* reason that large pages are used here. The 76*0Sstevel@tonic-gate * GART can't perform large-page translations, and the code appropriately 77*0Sstevel@tonic-gate * falls back to using small pages if page_create_va_large() fails. 78*0Sstevel@tonic-gate */ 79*0Sstevel@tonic-gate 80*0Sstevel@tonic-gate #define HOLD_DHP_LOCK(dhp) if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \ 81*0Sstevel@tonic-gate { mutex_enter(&dhp->dh_lock); } 82*0Sstevel@tonic-gate 83*0Sstevel@tonic-gate #define RELE_DHP_LOCK(dhp) if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \ 84*0Sstevel@tonic-gate { mutex_exit(&dhp->dh_lock); } 85*0Sstevel@tonic-gate 86*0Sstevel@tonic-gate #define FROM_LPG(pp) (pp->p_szc != 0) 87*0Sstevel@tonic-gate #define PFIND(pp) (page_pptonum(pp) & (pmem_pgcnt - 1)) 88*0Sstevel@tonic-gate 89*0Sstevel@tonic-gate /* 90*0Sstevel@tonic-gate * Structs and static variables used for pmem only. 91*0Sstevel@tonic-gate */ 92*0Sstevel@tonic-gate typedef struct pmem_lpg { 93*0Sstevel@tonic-gate page_t *pl_pp; /* start pp */ 94*0Sstevel@tonic-gate ulong_t *pl_bitmap; /* allocation status for each page */ 95*0Sstevel@tonic-gate ushort_t pl_pfree; /* this large page might be fully freed */ 96*0Sstevel@tonic-gate struct pmem_lpg *pl_next; 97*0Sstevel@tonic-gate struct pmem_lpg *pl_prev; 98*0Sstevel@tonic-gate } pmem_lpg_t; 99*0Sstevel@tonic-gate 100*0Sstevel@tonic-gate static size_t pmem_lpgsize; /* the size of one large page */ 101*0Sstevel@tonic-gate static pgcnt_t pmem_pgcnt; /* the number of small pages in a large page */ 102*0Sstevel@tonic-gate static uint_t pmem_lszc; /* page size code of the large page */ 103*0Sstevel@tonic-gate /* The segment to be associated with all the allocated pages. */ 104*0Sstevel@tonic-gate static struct seg pmem_seg; 105*0Sstevel@tonic-gate /* Fully occupied large pages allocated for pmem. */ 106*0Sstevel@tonic-gate static pmem_lpg_t *pmem_occ_lpgs; 107*0Sstevel@tonic-gate /* Memory pool to store residual small pages from large pages. */ 108*0Sstevel@tonic-gate static page_t *pmem_mpool = NULL; 109*0Sstevel@tonic-gate /* Number of small pages reside in pmem_mpool currently. */ 110*0Sstevel@tonic-gate static pgcnt_t pmem_nmpages = 0; 111*0Sstevel@tonic-gate /* To protect pmem_nmpages, pmem_mpool and pmem_occ_lpgs. */ 112*0Sstevel@tonic-gate kmutex_t pmem_mutex; 113*0Sstevel@tonic-gate 114*0Sstevel@tonic-gate static int lpg_isfree(pmem_lpg_t *); 115*0Sstevel@tonic-gate static void pmem_lpg_sub(pmem_lpg_t **, pmem_lpg_t *); 116*0Sstevel@tonic-gate static void pmem_lpg_concat(pmem_lpg_t **, pmem_lpg_t **); 117*0Sstevel@tonic-gate static pmem_lpg_t *pmem_lpg_get(pmem_lpg_t *, page_t *, pmem_lpg_t **); 118*0Sstevel@tonic-gate static pmem_lpg_t *pmem_lpg_alloc(uint_t); 119*0Sstevel@tonic-gate static void pmem_lpg_free(pmem_lpg_t **, pmem_lpg_t *); 120*0Sstevel@tonic-gate static void lpg_free(page_t *spp); 121*0Sstevel@tonic-gate static pgcnt_t mpool_break(page_t **, pgcnt_t); 122*0Sstevel@tonic-gate static void mpool_append(page_t **, pgcnt_t); 123*0Sstevel@tonic-gate static void lpp_break(page_t **, pgcnt_t, pgcnt_t, pmem_lpg_t *); 124*0Sstevel@tonic-gate static void lpp_free(page_t *, pgcnt_t, pmem_lpg_t **); 125*0Sstevel@tonic-gate static int lpp_create(page_t **, pgcnt_t, pgcnt_t *, pmem_lpg_t **, 126*0Sstevel@tonic-gate vnode_t *, u_offset_t *, uint_t); 127*0Sstevel@tonic-gate static void tlist_in(page_t *, pgcnt_t, vnode_t *, u_offset_t *); 128*0Sstevel@tonic-gate static void tlist_out(page_t *, pgcnt_t); 129*0Sstevel@tonic-gate static int pmem_cookie_alloc(struct devmap_pmem_cookie **, pgcnt_t, uint_t); 130*0Sstevel@tonic-gate static int pmem_lock(pgcnt_t, kproject_t **); 131*0Sstevel@tonic-gate 132*0Sstevel@tonic-gate /* 133*0Sstevel@tonic-gate * Called by driver devmap routine to pass physical memory mapping info to 134*0Sstevel@tonic-gate * seg_dev framework, used only for physical memory allocated from 135*0Sstevel@tonic-gate * devmap_pmem_alloc(). 136*0Sstevel@tonic-gate */ 137*0Sstevel@tonic-gate /* ARGSUSED */ 138*0Sstevel@tonic-gate int 139*0Sstevel@tonic-gate devmap_pmem_setup(devmap_cookie_t dhc, dev_info_t *dip, 140*0Sstevel@tonic-gate struct devmap_callback_ctl *callbackops, devmap_pmem_cookie_t cookie, 141*0Sstevel@tonic-gate offset_t off, size_t len, uint_t maxprot, uint_t flags, 142*0Sstevel@tonic-gate ddi_device_acc_attr_t *accattrp) 143*0Sstevel@tonic-gate { 144*0Sstevel@tonic-gate devmap_handle_t *dhp = (devmap_handle_t *)dhc; 145*0Sstevel@tonic-gate struct devmap_pmem_cookie *pcp = (struct devmap_pmem_cookie *)cookie; 146*0Sstevel@tonic-gate 147*0Sstevel@tonic-gate if (pcp == NULL || (off + len) > ptob(pcp->dp_npages)) 148*0Sstevel@tonic-gate return (DDI_FAILURE); 149*0Sstevel@tonic-gate 150*0Sstevel@tonic-gate /* 151*0Sstevel@tonic-gate * First to check if this function has been called for this dhp. 152*0Sstevel@tonic-gate */ 153*0Sstevel@tonic-gate if (dhp->dh_flags & DEVMAP_SETUP_DONE) 154*0Sstevel@tonic-gate return (DDI_FAILURE); 155*0Sstevel@tonic-gate 156*0Sstevel@tonic-gate if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot) 157*0Sstevel@tonic-gate return (DDI_FAILURE); 158*0Sstevel@tonic-gate 159*0Sstevel@tonic-gate if (flags & DEVMAP_MAPPING_INVALID) { 160*0Sstevel@tonic-gate /* 161*0Sstevel@tonic-gate * If DEVMAP_MAPPING_INVALID is specified, we have to grant 162*0Sstevel@tonic-gate * remap permission. 163*0Sstevel@tonic-gate */ 164*0Sstevel@tonic-gate if (!(flags & DEVMAP_ALLOW_REMAP)) 165*0Sstevel@tonic-gate return (DDI_FAILURE); 166*0Sstevel@tonic-gate } else { 167*0Sstevel@tonic-gate dhp->dh_pcookie = (devmap_pmem_cookie_t)pcp; 168*0Sstevel@tonic-gate /* dh_roff is the offset inside the dh_pcookie. */ 169*0Sstevel@tonic-gate dhp->dh_roff = ptob(btop(off)); 170*0Sstevel@tonic-gate } 171*0Sstevel@tonic-gate 172*0Sstevel@tonic-gate /* 173*0Sstevel@tonic-gate * Only "No Cache" and "Write Combining" are supported. If any other 174*0Sstevel@tonic-gate * cache type is specified, override with "No Cache". 175*0Sstevel@tonic-gate */ 176*0Sstevel@tonic-gate if (accattrp->devacc_attr_dataorder == DDI_MERGING_OK_ACC) 177*0Sstevel@tonic-gate dhp->dh_hat_attr = HAT_PLAT_NOCACHE | HAT_MERGING_OK; 178*0Sstevel@tonic-gate else 179*0Sstevel@tonic-gate dhp->dh_hat_attr = HAT_PLAT_NOCACHE | HAT_STRICTORDER; 180*0Sstevel@tonic-gate dhp->dh_cookie = DEVMAP_PMEM_COOKIE; 181*0Sstevel@tonic-gate dhp->dh_flags |= (flags & DEVMAP_SETUP_FLAGS); 182*0Sstevel@tonic-gate dhp->dh_len = ptob(btopr(len)); 183*0Sstevel@tonic-gate 184*0Sstevel@tonic-gate dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot; 185*0Sstevel@tonic-gate ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot); 186*0Sstevel@tonic-gate 187*0Sstevel@tonic-gate if (callbackops != NULL) { 188*0Sstevel@tonic-gate bcopy(callbackops, &dhp->dh_callbackops, 189*0Sstevel@tonic-gate sizeof (struct devmap_callback_ctl)); 190*0Sstevel@tonic-gate } 191*0Sstevel@tonic-gate 192*0Sstevel@tonic-gate /* 193*0Sstevel@tonic-gate * Initialize dh_lock if we want to do remap. 194*0Sstevel@tonic-gate */ 195*0Sstevel@tonic-gate if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) { 196*0Sstevel@tonic-gate mutex_init(&dhp->dh_lock, NULL, MUTEX_DEFAULT, NULL); 197*0Sstevel@tonic-gate dhp->dh_flags |= DEVMAP_LOCK_INITED; 198*0Sstevel@tonic-gate } 199*0Sstevel@tonic-gate 200*0Sstevel@tonic-gate dhp->dh_flags |= DEVMAP_SETUP_DONE; 201*0Sstevel@tonic-gate 202*0Sstevel@tonic-gate return (DDI_SUCCESS); 203*0Sstevel@tonic-gate } 204*0Sstevel@tonic-gate 205*0Sstevel@tonic-gate /* 206*0Sstevel@tonic-gate * Replace existing mapping using a new cookie, mainly gets called when doing 207*0Sstevel@tonic-gate * fork(). Should be called in associated devmap_dup(9E). 208*0Sstevel@tonic-gate */ 209*0Sstevel@tonic-gate /* ARGSUSED */ 210*0Sstevel@tonic-gate int 211*0Sstevel@tonic-gate devmap_pmem_remap(devmap_cookie_t dhc, dev_info_t *dip, 212*0Sstevel@tonic-gate devmap_pmem_cookie_t cookie, offset_t off, size_t len, uint_t maxprot, 213*0Sstevel@tonic-gate uint_t flags, ddi_device_acc_attr_t *accattrp) 214*0Sstevel@tonic-gate { 215*0Sstevel@tonic-gate devmap_handle_t *dhp = (devmap_handle_t *)dhc; 216*0Sstevel@tonic-gate struct devmap_pmem_cookie *pcp = (struct devmap_pmem_cookie *)cookie; 217*0Sstevel@tonic-gate 218*0Sstevel@tonic-gate /* 219*0Sstevel@tonic-gate * Reture failure if setup has not been done or no remap permission 220*0Sstevel@tonic-gate * has been granted during the setup. 221*0Sstevel@tonic-gate */ 222*0Sstevel@tonic-gate if ((dhp->dh_flags & DEVMAP_SETUP_DONE) == 0 || 223*0Sstevel@tonic-gate (dhp->dh_flags & DEVMAP_ALLOW_REMAP) == 0) 224*0Sstevel@tonic-gate return (DDI_FAILURE); 225*0Sstevel@tonic-gate 226*0Sstevel@tonic-gate /* No flags supported for remap yet. */ 227*0Sstevel@tonic-gate if (flags != 0) 228*0Sstevel@tonic-gate return (DDI_FAILURE); 229*0Sstevel@tonic-gate 230*0Sstevel@tonic-gate if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot) 231*0Sstevel@tonic-gate return (DDI_FAILURE); 232*0Sstevel@tonic-gate 233*0Sstevel@tonic-gate if (pcp == NULL || (off + len) > ptob(pcp->dp_npages)) 234*0Sstevel@tonic-gate return (DDI_FAILURE); 235*0Sstevel@tonic-gate 236*0Sstevel@tonic-gate HOLD_DHP_LOCK(dhp); 237*0Sstevel@tonic-gate /* 238*0Sstevel@tonic-gate * Unload the old mapping of pages reloated with this dhp, so next 239*0Sstevel@tonic-gate * fault will setup the new mappings. It is in segdev_faultpage that 240*0Sstevel@tonic-gate * calls hat_devload to establish the mapping. Do this while holding 241*0Sstevel@tonic-gate * the dhp lock so other faults dont reestablish the mappings. 242*0Sstevel@tonic-gate */ 243*0Sstevel@tonic-gate hat_unload(dhp->dh_seg->s_as->a_hat, dhp->dh_uvaddr, 244*0Sstevel@tonic-gate dhp->dh_len, HAT_UNLOAD|HAT_UNLOAD_OTHER); 245*0Sstevel@tonic-gate 246*0Sstevel@tonic-gate /* 247*0Sstevel@tonic-gate * Only "No Cache" and "Write Combining" are supported, if other cache 248*0Sstevel@tonic-gate * type is specified, override with "No Cache". 249*0Sstevel@tonic-gate */ 250*0Sstevel@tonic-gate if (accattrp->devacc_attr_dataorder == DDI_MERGING_OK_ACC) 251*0Sstevel@tonic-gate dhp->dh_hat_attr = HAT_MERGING_OK; 252*0Sstevel@tonic-gate else 253*0Sstevel@tonic-gate dhp->dh_hat_attr = HAT_STRICTORDER; 254*0Sstevel@tonic-gate dhp->dh_pcookie = cookie; 255*0Sstevel@tonic-gate dhp->dh_roff = ptob(btop(off)); 256*0Sstevel@tonic-gate dhp->dh_len = ptob(btopr(len)); 257*0Sstevel@tonic-gate 258*0Sstevel@tonic-gate /* Clear the large page size flag. */ 259*0Sstevel@tonic-gate dhp->dh_flags &= ~DEVMAP_FLAG_LARGE; 260*0Sstevel@tonic-gate 261*0Sstevel@tonic-gate dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot; 262*0Sstevel@tonic-gate ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot); 263*0Sstevel@tonic-gate RELE_DHP_LOCK(dhp); 264*0Sstevel@tonic-gate return (DDI_SUCCESS); 265*0Sstevel@tonic-gate } 266*0Sstevel@tonic-gate 267*0Sstevel@tonic-gate /* 268*0Sstevel@tonic-gate * Directly (i.e., without occupying kernel virtual address space) allocate 269*0Sstevel@tonic-gate * 'npages' physical memory pages for exporting to user land. The allocated 270*0Sstevel@tonic-gate * page_t pointer will be recorded in cookie. 271*0Sstevel@tonic-gate */ 272*0Sstevel@tonic-gate int 273*0Sstevel@tonic-gate devmap_pmem_alloc(size_t size, uint_t flags, devmap_pmem_cookie_t *cookiep) 274*0Sstevel@tonic-gate { 275*0Sstevel@tonic-gate u_offset_t pmem_off = 0; 276*0Sstevel@tonic-gate page_t *pp = NULL; 277*0Sstevel@tonic-gate page_t *lpp = NULL; 278*0Sstevel@tonic-gate page_t *tlist = NULL; 279*0Sstevel@tonic-gate pgcnt_t i = 0; 280*0Sstevel@tonic-gate pgcnt_t rpages = 0; 281*0Sstevel@tonic-gate pgcnt_t lpages = 0; 282*0Sstevel@tonic-gate pgcnt_t tpages = 0; 283*0Sstevel@tonic-gate pgcnt_t npages = btopr(size); 284*0Sstevel@tonic-gate pmem_lpg_t *plp = NULL; 285*0Sstevel@tonic-gate struct devmap_pmem_cookie *pcp; 286*0Sstevel@tonic-gate uint_t reserved = 0; 287*0Sstevel@tonic-gate uint_t locked = 0; 288*0Sstevel@tonic-gate uint_t pflags, kflags; 289*0Sstevel@tonic-gate 290*0Sstevel@tonic-gate *cookiep = NULL; 291*0Sstevel@tonic-gate 292*0Sstevel@tonic-gate /* 293*0Sstevel@tonic-gate * Number larger than this will cause page_create_va() to loop 294*0Sstevel@tonic-gate * infinitely. 295*0Sstevel@tonic-gate */ 296*0Sstevel@tonic-gate if (npages == 0 || npages >= total_pages / 2) 297*0Sstevel@tonic-gate return (DDI_FAILURE); 298*0Sstevel@tonic-gate if ((flags & (PMEM_SLEEP | PMEM_NOSLEEP)) == 0) 299*0Sstevel@tonic-gate return (DDI_FAILURE); 300*0Sstevel@tonic-gate pflags = flags & PMEM_NOSLEEP ? PG_EXCL : PG_WAIT; 301*0Sstevel@tonic-gate kflags = flags & PMEM_NOSLEEP ? KM_NOSLEEP : KM_SLEEP; 302*0Sstevel@tonic-gate 303*0Sstevel@tonic-gate /* Allocate pmem cookie. */ 304*0Sstevel@tonic-gate if (pmem_cookie_alloc(&pcp, npages, kflags) == DDI_FAILURE) 305*0Sstevel@tonic-gate return (DDI_FAILURE); 306*0Sstevel@tonic-gate pcp->dp_npages = npages; 307*0Sstevel@tonic-gate 308*0Sstevel@tonic-gate /* 309*0Sstevel@tonic-gate * See if the requested memory can be locked. Currently we do resource 310*0Sstevel@tonic-gate * controls on the project levlel only. 311*0Sstevel@tonic-gate */ 312*0Sstevel@tonic-gate if (pmem_lock(npages, &(pcp->dp_projp)) == DDI_FAILURE) 313*0Sstevel@tonic-gate goto alloc_fail; 314*0Sstevel@tonic-gate locked = 1; 315*0Sstevel@tonic-gate 316*0Sstevel@tonic-gate /* 317*0Sstevel@tonic-gate * First, grab as many as possible from pmem_mpool. If pages in 318*0Sstevel@tonic-gate * pmem_mpool are enough for this request, we are done. 319*0Sstevel@tonic-gate */ 320*0Sstevel@tonic-gate mutex_enter(&pmem_mutex); 321*0Sstevel@tonic-gate tpages = mpool_break(&tlist, npages); 322*0Sstevel@tonic-gate /* IOlock and hashin them into the new offset. */ 323*0Sstevel@tonic-gate if (tpages) 324*0Sstevel@tonic-gate tlist_in(tlist, tpages, pcp->dp_vnp, &pmem_off); 325*0Sstevel@tonic-gate mutex_exit(&pmem_mutex); 326*0Sstevel@tonic-gate 327*0Sstevel@tonic-gate if (tpages == npages) 328*0Sstevel@tonic-gate goto done; 329*0Sstevel@tonic-gate 330*0Sstevel@tonic-gate rpages = npages - tpages; 331*0Sstevel@tonic-gate /* Quit now if memory cannot be reserved. */ 332*0Sstevel@tonic-gate if (!page_resv(rpages, kflags)) 333*0Sstevel@tonic-gate goto alloc_fail; 334*0Sstevel@tonic-gate reserved = 1; 335*0Sstevel@tonic-gate 336*0Sstevel@tonic-gate /* Try to allocate large pages first to decrease fragmentation. */ 337*0Sstevel@tonic-gate i = (rpages + (pmem_pgcnt - 1)) / pmem_pgcnt; 338*0Sstevel@tonic-gate if (lpp_create(&lpp, i, &lpages, &plp, pcp->dp_vnp, &pmem_off, 339*0Sstevel@tonic-gate kflags) == DDI_FAILURE) 340*0Sstevel@tonic-gate goto alloc_fail; 341*0Sstevel@tonic-gate ASSERT(lpages == 0 ? lpp == NULL : 1); 342*0Sstevel@tonic-gate 343*0Sstevel@tonic-gate /* 344*0Sstevel@tonic-gate * Pages in large pages is more than the request, put the residual 345*0Sstevel@tonic-gate * pages into pmem_mpool. 346*0Sstevel@tonic-gate */ 347*0Sstevel@tonic-gate if (lpages >= rpages) { 348*0Sstevel@tonic-gate lpp_break(&lpp, lpages, lpages - rpages, plp); 349*0Sstevel@tonic-gate goto done; 350*0Sstevel@tonic-gate } 351*0Sstevel@tonic-gate 352*0Sstevel@tonic-gate /* Allocate small pages if lpp+tlist cannot satisfy the request. */ 353*0Sstevel@tonic-gate i = rpages - lpages; 354*0Sstevel@tonic-gate if ((pp = page_create_va(pcp->dp_vnp, pmem_off, ptob(i), 355*0Sstevel@tonic-gate pflags, &pmem_seg, (caddr_t)pmem_off)) == NULL) 356*0Sstevel@tonic-gate goto alloc_fail; 357*0Sstevel@tonic-gate 358*0Sstevel@tonic-gate done: 359*0Sstevel@tonic-gate page_list_concat(&tlist, &lpp); 360*0Sstevel@tonic-gate page_list_concat(&tlist, &pp); 361*0Sstevel@tonic-gate /* Set those small pages from large pages as allocated. */ 362*0Sstevel@tonic-gate mutex_enter(&pmem_mutex); 363*0Sstevel@tonic-gate pmem_lpg_concat(&pmem_occ_lpgs, &plp); 364*0Sstevel@tonic-gate mutex_exit(&pmem_mutex); 365*0Sstevel@tonic-gate 366*0Sstevel@tonic-gate /* 367*0Sstevel@tonic-gate * Now tlist holds all the pages for this cookie. Record these pages in 368*0Sstevel@tonic-gate * pmem cookie. 369*0Sstevel@tonic-gate */ 370*0Sstevel@tonic-gate for (pp = tlist, i = 0; i < npages; i++) { 371*0Sstevel@tonic-gate pcp->dp_pparray[i] = pp; 372*0Sstevel@tonic-gate page_io_unlock(pp); 373*0Sstevel@tonic-gate pp = pp->p_next; 374*0Sstevel@tonic-gate page_sub(&tlist, pp->p_prev); 375*0Sstevel@tonic-gate } 376*0Sstevel@tonic-gate ASSERT(tlist == NULL); 377*0Sstevel@tonic-gate *cookiep = (devmap_pmem_cookie_t)pcp; 378*0Sstevel@tonic-gate 379*0Sstevel@tonic-gate return (DDI_SUCCESS); 380*0Sstevel@tonic-gate 381*0Sstevel@tonic-gate alloc_fail: 382*0Sstevel@tonic-gate DTRACE_PROBE(pmem__alloc__fail); 383*0Sstevel@tonic-gate /* Free large pages and the associated allocation records. */ 384*0Sstevel@tonic-gate if (lpp) 385*0Sstevel@tonic-gate lpp_free(lpp, lpages / pmem_pgcnt, &plp); 386*0Sstevel@tonic-gate if (reserved == 1) 387*0Sstevel@tonic-gate page_unresv(rpages); 388*0Sstevel@tonic-gate /* Put those pages in tlist back into pmem_mpool. */ 389*0Sstevel@tonic-gate if (tpages != 0) { 390*0Sstevel@tonic-gate mutex_enter(&pmem_mutex); 391*0Sstevel@tonic-gate /* IOunlock, hashout and update the allocation records. */ 392*0Sstevel@tonic-gate tlist_out(tlist, tpages); 393*0Sstevel@tonic-gate mpool_append(&tlist, tpages); 394*0Sstevel@tonic-gate mutex_exit(&pmem_mutex); 395*0Sstevel@tonic-gate } 396*0Sstevel@tonic-gate if (locked == 1) 397*0Sstevel@tonic-gate i_ddi_decr_locked_memory(NULL, NULL, pcp->dp_projp, NULL, 398*0Sstevel@tonic-gate ptob(pcp->dp_npages)); 399*0Sstevel@tonic-gate /* Freeing pmem_cookie. */ 400*0Sstevel@tonic-gate kmem_free(pcp->dp_vnp, sizeof (vnode_t)); 401*0Sstevel@tonic-gate kmem_free(pcp->dp_pparray, npages * sizeof (page_t *)); 402*0Sstevel@tonic-gate kmem_free(pcp, sizeof (struct devmap_pmem_cookie)); 403*0Sstevel@tonic-gate return (DDI_FAILURE); 404*0Sstevel@tonic-gate } 405*0Sstevel@tonic-gate 406*0Sstevel@tonic-gate /* 407*0Sstevel@tonic-gate * Free all small pages inside cookie, and return pages from large pages into 408*0Sstevel@tonic-gate * mpool, if all the pages from one large page is in mpool, free it as a whole. 409*0Sstevel@tonic-gate */ 410*0Sstevel@tonic-gate void 411*0Sstevel@tonic-gate devmap_pmem_free(devmap_pmem_cookie_t cookie) 412*0Sstevel@tonic-gate { 413*0Sstevel@tonic-gate struct devmap_pmem_cookie *pcp = (struct devmap_pmem_cookie *)cookie; 414*0Sstevel@tonic-gate pgcnt_t i; 415*0Sstevel@tonic-gate pgcnt_t tpages = 0; 416*0Sstevel@tonic-gate page_t *pp; 417*0Sstevel@tonic-gate pmem_lpg_t *pl1, *plp; 418*0Sstevel@tonic-gate pmem_lpg_t *pf_lpgs = NULL; 419*0Sstevel@tonic-gate uint_t npls = 0; 420*0Sstevel@tonic-gate pmem_lpg_t *last_pl = NULL; 421*0Sstevel@tonic-gate pmem_lpg_t *plast_pl = NULL; 422*0Sstevel@tonic-gate 423*0Sstevel@tonic-gate ASSERT(pcp); 424*0Sstevel@tonic-gate mutex_enter(&pmem_mutex); 425*0Sstevel@tonic-gate /* Free small pages and return them to memory pool. */ 426*0Sstevel@tonic-gate for (i = pcp->dp_npages; i > 0; i--) { 427*0Sstevel@tonic-gate pp = pcp->dp_pparray[i - 1]; 428*0Sstevel@tonic-gate page_hashout(pp, NULL); 429*0Sstevel@tonic-gate /* 430*0Sstevel@tonic-gate * Remove the mapping of this single page, this mapping is 431*0Sstevel@tonic-gate * created using hat_devload() in segdev_faultpage(). 432*0Sstevel@tonic-gate */ 433*0Sstevel@tonic-gate (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD); 434*0Sstevel@tonic-gate if (!FROM_LPG(pp)) { 435*0Sstevel@tonic-gate /* Normal small page. */ 436*0Sstevel@tonic-gate page_free(pp, 1); 437*0Sstevel@tonic-gate page_unresv(1); 438*0Sstevel@tonic-gate } else { 439*0Sstevel@tonic-gate /* Small page from large pages. */ 440*0Sstevel@tonic-gate plp = pmem_lpg_get(pmem_occ_lpgs, pp, &last_pl); 441*0Sstevel@tonic-gate if (plp && !(plp->pl_pfree)) { 442*0Sstevel@tonic-gate /* 443*0Sstevel@tonic-gate * Move this record to pf_lpgs list, this large 444*0Sstevel@tonic-gate * page may be able to be freed as a whole. 445*0Sstevel@tonic-gate */ 446*0Sstevel@tonic-gate pmem_lpg_sub(&pmem_occ_lpgs, plp); 447*0Sstevel@tonic-gate pmem_lpg_concat(&pf_lpgs, &plp); 448*0Sstevel@tonic-gate plp->pl_pfree = 1; 449*0Sstevel@tonic-gate npls++; 450*0Sstevel@tonic-gate last_pl = NULL; 451*0Sstevel@tonic-gate } else { 452*0Sstevel@tonic-gate /* Search in pf_lpgs list. */ 453*0Sstevel@tonic-gate plp = pmem_lpg_get(pf_lpgs, pp, &plast_pl); 454*0Sstevel@tonic-gate } 455*0Sstevel@tonic-gate ASSERT(plp); 456*0Sstevel@tonic-gate /* Mark this page as free. */ 457*0Sstevel@tonic-gate BT_SET(plp->pl_bitmap, PFIND(pp)); 458*0Sstevel@tonic-gate /* Record this page in pmem_mpool. */ 459*0Sstevel@tonic-gate mpool_append(&pp, 1); 460*0Sstevel@tonic-gate } 461*0Sstevel@tonic-gate } 462*0Sstevel@tonic-gate 463*0Sstevel@tonic-gate /* 464*0Sstevel@tonic-gate * Find out the large pages whose pages have been freed, remove them 465*0Sstevel@tonic-gate * from plp list, free them and the associated pmem_lpg struct. 466*0Sstevel@tonic-gate */ 467*0Sstevel@tonic-gate for (plp = pf_lpgs; npls != 0; npls--) { 468*0Sstevel@tonic-gate pl1 = plp; 469*0Sstevel@tonic-gate plp = plp->pl_next; 470*0Sstevel@tonic-gate if (lpg_isfree(pl1)) { 471*0Sstevel@tonic-gate /* 472*0Sstevel@tonic-gate * Get one free large page. Find all pages in this 473*0Sstevel@tonic-gate * large page and remove them from pmem_mpool. 474*0Sstevel@tonic-gate */ 475*0Sstevel@tonic-gate lpg_free(pl1->pl_pp); 476*0Sstevel@tonic-gate /* Remove associated allocation records. */ 477*0Sstevel@tonic-gate pmem_lpg_sub(&pf_lpgs, pl1); 478*0Sstevel@tonic-gate pmem_lpg_free(&pf_lpgs, pl1); 479*0Sstevel@tonic-gate tpages -= pmem_pgcnt; 480*0Sstevel@tonic-gate } else 481*0Sstevel@tonic-gate pl1->pl_pfree = 0; 482*0Sstevel@tonic-gate } 483*0Sstevel@tonic-gate /* Update allocation records accordingly. */ 484*0Sstevel@tonic-gate pmem_lpg_concat(&pmem_occ_lpgs, &pf_lpgs); 485*0Sstevel@tonic-gate mutex_exit(&pmem_mutex); 486*0Sstevel@tonic-gate 487*0Sstevel@tonic-gate i_ddi_decr_locked_memory(NULL, NULL, (kproject_t *)pcp->dp_projp, NULL, 488*0Sstevel@tonic-gate ptob(pcp->dp_npages)); 489*0Sstevel@tonic-gate kmem_free(pcp->dp_vnp, sizeof (vnode_t)); 490*0Sstevel@tonic-gate kmem_free(pcp->dp_pparray, pcp->dp_npages * sizeof (page_t *)); 491*0Sstevel@tonic-gate kmem_free(pcp, sizeof (struct devmap_pmem_cookie)); 492*0Sstevel@tonic-gate } 493*0Sstevel@tonic-gate 494*0Sstevel@tonic-gate /* 495*0Sstevel@tonic-gate * To extract page frame number from specified range in a cookie. 496*0Sstevel@tonic-gate */ 497*0Sstevel@tonic-gate int 498*0Sstevel@tonic-gate devmap_pmem_getpfns(devmap_pmem_cookie_t cookie, uint_t start, pgcnt_t npages, 499*0Sstevel@tonic-gate pfn_t *pfnarray) 500*0Sstevel@tonic-gate { 501*0Sstevel@tonic-gate struct devmap_pmem_cookie *pcp = (struct devmap_pmem_cookie *)cookie; 502*0Sstevel@tonic-gate pgcnt_t i; 503*0Sstevel@tonic-gate 504*0Sstevel@tonic-gate if (pcp == NULL || start + npages > pcp->dp_npages) 505*0Sstevel@tonic-gate return (DDI_FAILURE); 506*0Sstevel@tonic-gate 507*0Sstevel@tonic-gate for (i = start; i < start + npages; i++) 508*0Sstevel@tonic-gate pfnarray[i - start] = pcp->dp_pparray[i]->p_pagenum; 509*0Sstevel@tonic-gate return (DDI_SUCCESS); 510*0Sstevel@tonic-gate } 511*0Sstevel@tonic-gate 512*0Sstevel@tonic-gate void 513*0Sstevel@tonic-gate pmem_init() 514*0Sstevel@tonic-gate { 515*0Sstevel@tonic-gate mutex_init(&pmem_mutex, NULL, MUTEX_DEFAULT, NULL); 516*0Sstevel@tonic-gate pmem_lszc = MIN(1, page_num_pagesizes() - 1); 517*0Sstevel@tonic-gate pmem_lpgsize = page_get_pagesize(pmem_lszc); 518*0Sstevel@tonic-gate pmem_pgcnt = pmem_lpgsize >> PAGESHIFT; 519*0Sstevel@tonic-gate bzero(&pmem_seg, sizeof (struct seg)); 520*0Sstevel@tonic-gate pmem_seg.s_as = &kas; 521*0Sstevel@tonic-gate } 522*0Sstevel@tonic-gate 523*0Sstevel@tonic-gate /* Allocate kernel memory for one pmem cookie with n pages. */ 524*0Sstevel@tonic-gate static int 525*0Sstevel@tonic-gate pmem_cookie_alloc(struct devmap_pmem_cookie **pcpp, pgcnt_t n, uint_t kflags) 526*0Sstevel@tonic-gate { 527*0Sstevel@tonic-gate struct devmap_pmem_cookie *pcp; 528*0Sstevel@tonic-gate 529*0Sstevel@tonic-gate if ((*pcpp = kmem_zalloc(sizeof (struct devmap_pmem_cookie), 530*0Sstevel@tonic-gate kflags)) == NULL) 531*0Sstevel@tonic-gate return (DDI_FAILURE); 532*0Sstevel@tonic-gate pcp = *pcpp; 533*0Sstevel@tonic-gate if ((pcp->dp_vnp = 534*0Sstevel@tonic-gate kmem_zalloc(sizeof (vnode_t), kflags)) == NULL) { 535*0Sstevel@tonic-gate kmem_free(pcp, sizeof (struct devmap_pmem_cookie)); 536*0Sstevel@tonic-gate return (DDI_FAILURE); 537*0Sstevel@tonic-gate } 538*0Sstevel@tonic-gate if ((pcp->dp_pparray = 539*0Sstevel@tonic-gate kmem_zalloc(n * sizeof (page_t *), kflags)) == NULL) { 540*0Sstevel@tonic-gate kmem_free(pcp->dp_vnp, sizeof (vnode_t)); 541*0Sstevel@tonic-gate kmem_free(pcp, sizeof (struct devmap_pmem_cookie)); 542*0Sstevel@tonic-gate return (DDI_FAILURE); 543*0Sstevel@tonic-gate } 544*0Sstevel@tonic-gate return (DDI_SUCCESS); 545*0Sstevel@tonic-gate } 546*0Sstevel@tonic-gate 547*0Sstevel@tonic-gate /* Try to lock down n pages resource for current project. */ 548*0Sstevel@tonic-gate static int 549*0Sstevel@tonic-gate pmem_lock(pgcnt_t n, kproject_t **prjpp) 550*0Sstevel@tonic-gate { 551*0Sstevel@tonic-gate mutex_enter(&curproc->p_lock); 552*0Sstevel@tonic-gate if (i_ddi_incr_locked_memory(curproc, NULL, NULL, NULL, 553*0Sstevel@tonic-gate ptob(n)) != 0) { 554*0Sstevel@tonic-gate mutex_exit(&curproc->p_lock); 555*0Sstevel@tonic-gate return (DDI_FAILURE); 556*0Sstevel@tonic-gate } 557*0Sstevel@tonic-gate /* Store this project in cookie for later lock/unlock. */ 558*0Sstevel@tonic-gate *prjpp = curproc->p_task->tk_proj; 559*0Sstevel@tonic-gate mutex_exit(&curproc->p_lock); 560*0Sstevel@tonic-gate return (DDI_SUCCESS); 561*0Sstevel@tonic-gate } 562*0Sstevel@tonic-gate 563*0Sstevel@tonic-gate /* To check if all the pages in a large page are freed. */ 564*0Sstevel@tonic-gate static int 565*0Sstevel@tonic-gate lpg_isfree(pmem_lpg_t *plp) 566*0Sstevel@tonic-gate { 567*0Sstevel@tonic-gate uint_t i; 568*0Sstevel@tonic-gate 569*0Sstevel@tonic-gate for (i = 0; i < BT_BITOUL(pmem_pgcnt); i++) 570*0Sstevel@tonic-gate if (plp->pl_bitmap[i] != BT_ULMAXMASK) 571*0Sstevel@tonic-gate return (0); 572*0Sstevel@tonic-gate /* All 1 means all pages are freed. */ 573*0Sstevel@tonic-gate return (1); 574*0Sstevel@tonic-gate } 575*0Sstevel@tonic-gate 576*0Sstevel@tonic-gate /* 577*0Sstevel@tonic-gate * Using pp to get the associated large page allocation record, searching in 578*0Sstevel@tonic-gate * the splp linked list with *last as the heuristic pointer. Return NULL if 579*0Sstevel@tonic-gate * not found. 580*0Sstevel@tonic-gate */ 581*0Sstevel@tonic-gate static pmem_lpg_t * 582*0Sstevel@tonic-gate pmem_lpg_get(pmem_lpg_t *splp, page_t *pp, pmem_lpg_t **last) 583*0Sstevel@tonic-gate { 584*0Sstevel@tonic-gate pmem_lpg_t *plp; 585*0Sstevel@tonic-gate pgcnt_t root_pfn; 586*0Sstevel@tonic-gate 587*0Sstevel@tonic-gate ASSERT(pp); 588*0Sstevel@tonic-gate if (splp == NULL) 589*0Sstevel@tonic-gate return (NULL); 590*0Sstevel@tonic-gate root_pfn = page_pptonum(pp) & ~(pmem_pgcnt - 1); 591*0Sstevel@tonic-gate 592*0Sstevel@tonic-gate /* Try last winner first. */ 593*0Sstevel@tonic-gate if (*last && root_pfn == page_pptonum((*last)->pl_pp)) 594*0Sstevel@tonic-gate goto pl_found; 595*0Sstevel@tonic-gate 596*0Sstevel@tonic-gate /* Else search the whole pmem_lpg list. */ 597*0Sstevel@tonic-gate for (plp = splp; root_pfn != page_pptonum(plp->pl_pp); ) { 598*0Sstevel@tonic-gate plp = plp->pl_next; 599*0Sstevel@tonic-gate if (plp == splp) { 600*0Sstevel@tonic-gate plp = NULL; 601*0Sstevel@tonic-gate break; 602*0Sstevel@tonic-gate } 603*0Sstevel@tonic-gate ASSERT(plp->pl_pp); 604*0Sstevel@tonic-gate } 605*0Sstevel@tonic-gate 606*0Sstevel@tonic-gate *last = plp; 607*0Sstevel@tonic-gate 608*0Sstevel@tonic-gate pl_found: 609*0Sstevel@tonic-gate return (*last); 610*0Sstevel@tonic-gate } 611*0Sstevel@tonic-gate 612*0Sstevel@tonic-gate /* 613*0Sstevel@tonic-gate * Remove one pmem_lpg plp from the oplpp list. 614*0Sstevel@tonic-gate */ 615*0Sstevel@tonic-gate static void 616*0Sstevel@tonic-gate pmem_lpg_sub(pmem_lpg_t **oplpp, pmem_lpg_t *plp) 617*0Sstevel@tonic-gate { 618*0Sstevel@tonic-gate if (*oplpp == plp) 619*0Sstevel@tonic-gate *oplpp = plp->pl_next; /* go to next pmem_lpg */ 620*0Sstevel@tonic-gate 621*0Sstevel@tonic-gate if (*oplpp == plp) 622*0Sstevel@tonic-gate *oplpp = NULL; /* pmem_lpg list is gone */ 623*0Sstevel@tonic-gate else { 624*0Sstevel@tonic-gate plp->pl_prev->pl_next = plp->pl_next; 625*0Sstevel@tonic-gate plp->pl_next->pl_prev = plp->pl_prev; 626*0Sstevel@tonic-gate } 627*0Sstevel@tonic-gate plp->pl_prev = plp->pl_next = plp; /* make plp a list of one */ 628*0Sstevel@tonic-gate } 629*0Sstevel@tonic-gate 630*0Sstevel@tonic-gate /* 631*0Sstevel@tonic-gate * Concatenate page list nplpp onto the end of list plpp. 632*0Sstevel@tonic-gate */ 633*0Sstevel@tonic-gate static void 634*0Sstevel@tonic-gate pmem_lpg_concat(pmem_lpg_t **plpp, pmem_lpg_t **nplpp) 635*0Sstevel@tonic-gate { 636*0Sstevel@tonic-gate pmem_lpg_t *s1p, *s2p, *e1p, *e2p; 637*0Sstevel@tonic-gate 638*0Sstevel@tonic-gate if (*nplpp == NULL) { 639*0Sstevel@tonic-gate return; 640*0Sstevel@tonic-gate } 641*0Sstevel@tonic-gate if (*plpp == NULL) { 642*0Sstevel@tonic-gate *plpp = *nplpp; 643*0Sstevel@tonic-gate return; 644*0Sstevel@tonic-gate } 645*0Sstevel@tonic-gate s1p = *plpp; 646*0Sstevel@tonic-gate e1p = s1p->pl_prev; 647*0Sstevel@tonic-gate s2p = *nplpp; 648*0Sstevel@tonic-gate e2p = s2p->pl_prev; 649*0Sstevel@tonic-gate s1p->pl_prev = e2p; 650*0Sstevel@tonic-gate e2p->pl_next = s1p; 651*0Sstevel@tonic-gate e1p->pl_next = s2p; 652*0Sstevel@tonic-gate s2p->pl_prev = e1p; 653*0Sstevel@tonic-gate } 654*0Sstevel@tonic-gate 655*0Sstevel@tonic-gate /* 656*0Sstevel@tonic-gate * Allocate and initialize the allocation record of one large page, the init 657*0Sstevel@tonic-gate * value is 'allocated'. 658*0Sstevel@tonic-gate */ 659*0Sstevel@tonic-gate static pmem_lpg_t * 660*0Sstevel@tonic-gate pmem_lpg_alloc(uint_t kflags) 661*0Sstevel@tonic-gate { 662*0Sstevel@tonic-gate pmem_lpg_t *plp; 663*0Sstevel@tonic-gate 664*0Sstevel@tonic-gate ASSERT(pmem_pgcnt % BT_NBIPUL == 0); 665*0Sstevel@tonic-gate plp = kmem_zalloc(sizeof (pmem_lpg_t), kflags); 666*0Sstevel@tonic-gate if (plp == NULL) 667*0Sstevel@tonic-gate return (NULL); 668*0Sstevel@tonic-gate plp->pl_bitmap = kmem_zalloc(BT_SIZEOFMAP(pmem_pgcnt), kflags); 669*0Sstevel@tonic-gate if (plp->pl_bitmap == NULL) { 670*0Sstevel@tonic-gate kmem_free(plp, sizeof (*plp)); 671*0Sstevel@tonic-gate return (NULL); 672*0Sstevel@tonic-gate } 673*0Sstevel@tonic-gate plp->pl_next = plp->pl_prev = plp; 674*0Sstevel@tonic-gate return (plp); 675*0Sstevel@tonic-gate } 676*0Sstevel@tonic-gate 677*0Sstevel@tonic-gate /* Free one allocation record pointed by oplp. */ 678*0Sstevel@tonic-gate static void 679*0Sstevel@tonic-gate pmem_lpg_free(pmem_lpg_t **headp, pmem_lpg_t *plp) 680*0Sstevel@tonic-gate { 681*0Sstevel@tonic-gate if (*headp == plp) 682*0Sstevel@tonic-gate *headp = plp->pl_next; /* go to next pmem_lpg_t */ 683*0Sstevel@tonic-gate 684*0Sstevel@tonic-gate if (*headp == plp) 685*0Sstevel@tonic-gate *headp = NULL; /* this list is gone */ 686*0Sstevel@tonic-gate else { 687*0Sstevel@tonic-gate plp->pl_prev->pl_next = plp->pl_next; 688*0Sstevel@tonic-gate plp->pl_next->pl_prev = plp->pl_prev; 689*0Sstevel@tonic-gate } 690*0Sstevel@tonic-gate kmem_free(plp->pl_bitmap, BT_SIZEOFMAP(pmem_pgcnt)); 691*0Sstevel@tonic-gate kmem_free(plp, sizeof (*plp)); 692*0Sstevel@tonic-gate } 693*0Sstevel@tonic-gate 694*0Sstevel@tonic-gate /* Free one large page headed by spp from pmem_mpool. */ 695*0Sstevel@tonic-gate static void 696*0Sstevel@tonic-gate lpg_free(page_t *spp) 697*0Sstevel@tonic-gate { 698*0Sstevel@tonic-gate page_t *pp1 = spp; 699*0Sstevel@tonic-gate uint_t i; 700*0Sstevel@tonic-gate 701*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pmem_mutex)); 702*0Sstevel@tonic-gate for (i = 0; i < pmem_pgcnt; i++) { 703*0Sstevel@tonic-gate /* Break pp1 from pmem_mpool. */ 704*0Sstevel@tonic-gate page_sub(&pmem_mpool, pp1); 705*0Sstevel@tonic-gate pp1++; 706*0Sstevel@tonic-gate } 707*0Sstevel@tonic-gate /* Free pages in this large page. */ 708*0Sstevel@tonic-gate page_free_pages(spp); 709*0Sstevel@tonic-gate page_unresv(pmem_pgcnt); 710*0Sstevel@tonic-gate pmem_nmpages -= pmem_pgcnt; 711*0Sstevel@tonic-gate ASSERT((pmem_nmpages && pmem_mpool) || (!pmem_nmpages && !pmem_mpool)); 712*0Sstevel@tonic-gate } 713*0Sstevel@tonic-gate 714*0Sstevel@tonic-gate /* Put n pages in *ppp list back into pmem_mpool. */ 715*0Sstevel@tonic-gate static void 716*0Sstevel@tonic-gate mpool_append(page_t **ppp, pgcnt_t n) 717*0Sstevel@tonic-gate { 718*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pmem_mutex)); 719*0Sstevel@tonic-gate /* Put back pages. */ 720*0Sstevel@tonic-gate page_list_concat(&pmem_mpool, ppp); 721*0Sstevel@tonic-gate pmem_nmpages += n; 722*0Sstevel@tonic-gate ASSERT((pmem_nmpages && pmem_mpool) || (!pmem_nmpages && !pmem_mpool)); 723*0Sstevel@tonic-gate } 724*0Sstevel@tonic-gate 725*0Sstevel@tonic-gate /* 726*0Sstevel@tonic-gate * Try to grab MIN(pmem_nmpages, n) pages from pmem_mpool, put them into *ppp 727*0Sstevel@tonic-gate * list, and return the number of grabbed pages. 728*0Sstevel@tonic-gate */ 729*0Sstevel@tonic-gate static pgcnt_t 730*0Sstevel@tonic-gate mpool_break(page_t **ppp, pgcnt_t n) 731*0Sstevel@tonic-gate { 732*0Sstevel@tonic-gate pgcnt_t i; 733*0Sstevel@tonic-gate 734*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pmem_mutex)); 735*0Sstevel@tonic-gate /* Grab the pages. */ 736*0Sstevel@tonic-gate i = MIN(pmem_nmpages, n); 737*0Sstevel@tonic-gate *ppp = pmem_mpool; 738*0Sstevel@tonic-gate page_list_break(ppp, &pmem_mpool, i); 739*0Sstevel@tonic-gate pmem_nmpages -= i; 740*0Sstevel@tonic-gate ASSERT((pmem_nmpages && pmem_mpool) || (!pmem_nmpages && !pmem_mpool)); 741*0Sstevel@tonic-gate return (i); 742*0Sstevel@tonic-gate } 743*0Sstevel@tonic-gate 744*0Sstevel@tonic-gate /* 745*0Sstevel@tonic-gate * Create n large pages, lpages and plpp contains the number of small pages and 746*0Sstevel@tonic-gate * allocation records list respectively. 747*0Sstevel@tonic-gate */ 748*0Sstevel@tonic-gate static int 749*0Sstevel@tonic-gate lpp_create(page_t **lppp, pgcnt_t n, pgcnt_t *lpages, pmem_lpg_t **plpp, 750*0Sstevel@tonic-gate vnode_t *vnp, u_offset_t *offp, uint_t kflags) 751*0Sstevel@tonic-gate { 752*0Sstevel@tonic-gate pgcnt_t i; 753*0Sstevel@tonic-gate pmem_lpg_t *plp; 754*0Sstevel@tonic-gate page_t *pp; 755*0Sstevel@tonic-gate 756*0Sstevel@tonic-gate for (i = 0, *lpages = 0; i < n; i++) { 757*0Sstevel@tonic-gate /* Allocte one large page each time. */ 758*0Sstevel@tonic-gate pp = page_create_va_large(vnp, *offp, pmem_lpgsize, 759*0Sstevel@tonic-gate PG_EXCL, &pmem_seg, (caddr_t)*offp, NULL); 760*0Sstevel@tonic-gate if (pp == NULL) 761*0Sstevel@tonic-gate break; 762*0Sstevel@tonic-gate *offp += pmem_lpgsize; 763*0Sstevel@tonic-gate page_list_concat(lppp, &pp); 764*0Sstevel@tonic-gate *lpages += pmem_pgcnt; 765*0Sstevel@tonic-gate /* Add one allocation record for this large page. */ 766*0Sstevel@tonic-gate if ((plp = pmem_lpg_alloc(kflags)) == NULL) 767*0Sstevel@tonic-gate return (DDI_FAILURE); 768*0Sstevel@tonic-gate plp->pl_pp = pp; 769*0Sstevel@tonic-gate pmem_lpg_concat(plpp, &plp); 770*0Sstevel@tonic-gate } 771*0Sstevel@tonic-gate return (DDI_SUCCESS); 772*0Sstevel@tonic-gate } 773*0Sstevel@tonic-gate 774*0Sstevel@tonic-gate /* 775*0Sstevel@tonic-gate * Break the last r small pages from the large page list *lppp (with totally n 776*0Sstevel@tonic-gate * small pages) and put them into pmem_mpool. 777*0Sstevel@tonic-gate */ 778*0Sstevel@tonic-gate static void 779*0Sstevel@tonic-gate lpp_break(page_t **lppp, pgcnt_t n, pgcnt_t r, pmem_lpg_t *oplp) 780*0Sstevel@tonic-gate { 781*0Sstevel@tonic-gate page_t *pp, *pp1; 782*0Sstevel@tonic-gate pgcnt_t i; 783*0Sstevel@tonic-gate pmem_lpg_t *plp; 784*0Sstevel@tonic-gate 785*0Sstevel@tonic-gate if (r == 0) 786*0Sstevel@tonic-gate return; 787*0Sstevel@tonic-gate ASSERT(*lppp != NULL && r < pmem_pgcnt); 788*0Sstevel@tonic-gate page_list_break(lppp, &pp, n - r); 789*0Sstevel@tonic-gate 790*0Sstevel@tonic-gate /* The residual should reside in the last large page. */ 791*0Sstevel@tonic-gate plp = oplp->pl_prev; 792*0Sstevel@tonic-gate /* IOunlock and hashout the residual pages. */ 793*0Sstevel@tonic-gate for (pp1 = pp, i = 0; i < r; i++) { 794*0Sstevel@tonic-gate page_io_unlock(pp1); 795*0Sstevel@tonic-gate page_hashout(pp1, NULL); 796*0Sstevel@tonic-gate /* Mark this page as free. */ 797*0Sstevel@tonic-gate BT_SET(plp->pl_bitmap, PFIND(pp1)); 798*0Sstevel@tonic-gate pp1 = pp1->p_next; 799*0Sstevel@tonic-gate } 800*0Sstevel@tonic-gate ASSERT(pp1 == pp); 801*0Sstevel@tonic-gate /* Put these residual pages into memory pool. */ 802*0Sstevel@tonic-gate mutex_enter(&pmem_mutex); 803*0Sstevel@tonic-gate mpool_append(&pp, r); 804*0Sstevel@tonic-gate mutex_exit(&pmem_mutex); 805*0Sstevel@tonic-gate } 806*0Sstevel@tonic-gate 807*0Sstevel@tonic-gate /* Freeing large pages in lpp and the associated allocation records in plp. */ 808*0Sstevel@tonic-gate static void 809*0Sstevel@tonic-gate lpp_free(page_t *lpp, pgcnt_t lpgs, pmem_lpg_t **plpp) 810*0Sstevel@tonic-gate { 811*0Sstevel@tonic-gate pgcnt_t i, j; 812*0Sstevel@tonic-gate page_t *pp = lpp, *pp1; 813*0Sstevel@tonic-gate pmem_lpg_t *plp1, *plp2; 814*0Sstevel@tonic-gate 815*0Sstevel@tonic-gate for (i = 0; i < lpgs; i++) { 816*0Sstevel@tonic-gate for (j = 0; j < pmem_pgcnt; j++) { 817*0Sstevel@tonic-gate /* IO unlock and hashout this small page. */ 818*0Sstevel@tonic-gate page_io_unlock(pp); 819*0Sstevel@tonic-gate page_hashout(pp, NULL); 820*0Sstevel@tonic-gate pp1 = pp->p_next; 821*0Sstevel@tonic-gate pp->p_prev = pp->p_next = pp; 822*0Sstevel@tonic-gate pp = pp1; 823*0Sstevel@tonic-gate } 824*0Sstevel@tonic-gate /* Free one large page at one time. */ 825*0Sstevel@tonic-gate page_free_pages(lpp); 826*0Sstevel@tonic-gate lpp = pp; 827*0Sstevel@tonic-gate } 828*0Sstevel@tonic-gate /* Free associate pmem large page allocation records. */ 829*0Sstevel@tonic-gate for (plp1 = *plpp; *plpp; plp1 = plp2) { 830*0Sstevel@tonic-gate plp2 = plp1->pl_next; 831*0Sstevel@tonic-gate pmem_lpg_free(plpp, plp1); 832*0Sstevel@tonic-gate } 833*0Sstevel@tonic-gate } 834*0Sstevel@tonic-gate 835*0Sstevel@tonic-gate /* 836*0Sstevel@tonic-gate * IOlock and hashin all pages in tlist, associate them with vnode *pvnp 837*0Sstevel@tonic-gate * and offset starting with *poffp. Update allocation records accordingly at 838*0Sstevel@tonic-gate * the same time. 839*0Sstevel@tonic-gate */ 840*0Sstevel@tonic-gate static void 841*0Sstevel@tonic-gate tlist_in(page_t *tlist, pgcnt_t tpages, vnode_t *pvnp, u_offset_t *poffp) 842*0Sstevel@tonic-gate { 843*0Sstevel@tonic-gate page_t *pp; 844*0Sstevel@tonic-gate pgcnt_t i = 0; 845*0Sstevel@tonic-gate pmem_lpg_t *plp, *last_pl = NULL; 846*0Sstevel@tonic-gate 847*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pmem_mutex)); 848*0Sstevel@tonic-gate for (pp = tlist; i < tpages; i++) { 849*0Sstevel@tonic-gate ASSERT(FROM_LPG(pp)); 850*0Sstevel@tonic-gate page_io_lock(pp); 851*0Sstevel@tonic-gate (void) page_hashin(pp, pvnp, *poffp, NULL); 852*0Sstevel@tonic-gate plp = pmem_lpg_get(pmem_occ_lpgs, pp, &last_pl); 853*0Sstevel@tonic-gate /* Mark this page as allocated. */ 854*0Sstevel@tonic-gate BT_CLEAR(plp->pl_bitmap, PFIND(pp)); 855*0Sstevel@tonic-gate *poffp += PAGESIZE; 856*0Sstevel@tonic-gate pp = pp->p_next; 857*0Sstevel@tonic-gate } 858*0Sstevel@tonic-gate ASSERT(pp == tlist); 859*0Sstevel@tonic-gate } 860*0Sstevel@tonic-gate 861*0Sstevel@tonic-gate /* 862*0Sstevel@tonic-gate * IOunlock and hashout all pages in tlist, update allocation records 863*0Sstevel@tonic-gate * accordingly at the same time. 864*0Sstevel@tonic-gate */ 865*0Sstevel@tonic-gate static void 866*0Sstevel@tonic-gate tlist_out(page_t *tlist, pgcnt_t tpages) 867*0Sstevel@tonic-gate { 868*0Sstevel@tonic-gate page_t *pp; 869*0Sstevel@tonic-gate pgcnt_t i = 0; 870*0Sstevel@tonic-gate pmem_lpg_t *plp, *last_pl = NULL; 871*0Sstevel@tonic-gate 872*0Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pmem_mutex)); 873*0Sstevel@tonic-gate for (pp = tlist; i < tpages; i++) { 874*0Sstevel@tonic-gate ASSERT(FROM_LPG(pp)); 875*0Sstevel@tonic-gate page_io_unlock(pp); 876*0Sstevel@tonic-gate page_hashout(pp, NULL); 877*0Sstevel@tonic-gate plp = pmem_lpg_get(pmem_occ_lpgs, pp, &last_pl); 878*0Sstevel@tonic-gate /* Mark this page as free. */ 879*0Sstevel@tonic-gate BT_SET(plp->pl_bitmap, PFIND(pp)); 880*0Sstevel@tonic-gate pp = pp->p_next; 881*0Sstevel@tonic-gate } 882*0Sstevel@tonic-gate ASSERT(pp == tlist); 883*0Sstevel@tonic-gate } 884