10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
51900Seota * Common Development and Distribution License (the "License").
61900Seota * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate /*
22*5527Smrj * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
230Sstevel@tonic-gate * Use is subject to license terms.
240Sstevel@tonic-gate */
250Sstevel@tonic-gate
260Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI"
270Sstevel@tonic-gate
280Sstevel@tonic-gate /*
290Sstevel@tonic-gate * PMEM - Direct mapping physical memory pages to userland process
300Sstevel@tonic-gate *
310Sstevel@tonic-gate * Provide functions used for directly (w/o occupying kernel virtual address
320Sstevel@tonic-gate * space) allocating and exporting physical memory pages to userland.
330Sstevel@tonic-gate */
340Sstevel@tonic-gate
350Sstevel@tonic-gate #include <sys/types.h>
360Sstevel@tonic-gate #include <sys/mutex.h>
370Sstevel@tonic-gate #include <sys/sunddi.h>
380Sstevel@tonic-gate #include <sys/ddidevmap.h>
390Sstevel@tonic-gate #include <sys/vnode.h>
400Sstevel@tonic-gate #include <sys/sysmacros.h>
410Sstevel@tonic-gate #include <vm/seg_dev.h>
420Sstevel@tonic-gate #include <sys/pmem.h>
430Sstevel@tonic-gate #include <vm/hat_i86.h>
440Sstevel@tonic-gate #include <sys/task.h>
450Sstevel@tonic-gate #include <sys/sdt.h>
460Sstevel@tonic-gate
470Sstevel@tonic-gate /*
480Sstevel@tonic-gate * The routines in this file allocate memory which will be accessed through
490Sstevel@tonic-gate * the AGP GART hardware. The GART is programmed with the PFNs for this
500Sstevel@tonic-gate * memory, and the only mechanism for removing these entries is by an
510Sstevel@tonic-gate * explicit process operation (ioctl/close of the driver, or process exit).
520Sstevel@tonic-gate * As such, the pages need to remain locked to ensure that they won't be
530Sstevel@tonic-gate * relocated or paged out.
540Sstevel@tonic-gate *
550Sstevel@tonic-gate * To prevent these locked pages from getting in the way of page
560Sstevel@tonic-gate * coalescing, we try to allocate large pages from the system, and carve
570Sstevel@tonic-gate * them up to satisfy pmem allocation requests. This will keep the locked
580Sstevel@tonic-gate * pages within a constrained area of physical memory, limiting the number
590Sstevel@tonic-gate * of large pages that would be pinned by our locked pages. This is, of
600Sstevel@tonic-gate * course, another take on the infamous kernel cage, and it has many of the
610Sstevel@tonic-gate * downsides of the original cage. It also interferes with system-wide
620Sstevel@tonic-gate * resource management decisions, as it maintains its own pool of unused
630Sstevel@tonic-gate * pages which can't be easily reclaimed and used during low-memory
640Sstevel@tonic-gate * situations.
650Sstevel@tonic-gate *
660Sstevel@tonic-gate * The right solution is for pmem to register a callback that the VM system
670Sstevel@tonic-gate * could call, which would temporarily remove any GART entries for pages
680Sstevel@tonic-gate * that were being relocated. This would let us leave the pages unlocked,
690Sstevel@tonic-gate * which would remove the need for using large pages, which would simplify
700Sstevel@tonic-gate * this code a great deal. Unfortunately, the support for these callbacks
710Sstevel@tonic-gate * only exists on some SPARC platforms right now.
720Sstevel@tonic-gate *
730Sstevel@tonic-gate * Note that this is the *only* reason that large pages are used here. The
740Sstevel@tonic-gate * GART can't perform large-page translations, and the code appropriately
750Sstevel@tonic-gate * falls back to using small pages if page_create_va_large() fails.
760Sstevel@tonic-gate */
770Sstevel@tonic-gate
780Sstevel@tonic-gate #define HOLD_DHP_LOCK(dhp) if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \
790Sstevel@tonic-gate { mutex_enter(&dhp->dh_lock); }
800Sstevel@tonic-gate
810Sstevel@tonic-gate #define RELE_DHP_LOCK(dhp) if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \
820Sstevel@tonic-gate { mutex_exit(&dhp->dh_lock); }
830Sstevel@tonic-gate
840Sstevel@tonic-gate #define FROM_LPG(pp) (pp->p_szc != 0)
850Sstevel@tonic-gate #define PFIND(pp) (page_pptonum(pp) & (pmem_pgcnt - 1))
860Sstevel@tonic-gate
870Sstevel@tonic-gate /*
880Sstevel@tonic-gate * Structs and static variables used for pmem only.
890Sstevel@tonic-gate */
900Sstevel@tonic-gate typedef struct pmem_lpg {
910Sstevel@tonic-gate page_t *pl_pp; /* start pp */
920Sstevel@tonic-gate ulong_t *pl_bitmap; /* allocation status for each page */
930Sstevel@tonic-gate ushort_t pl_pfree; /* this large page might be fully freed */
940Sstevel@tonic-gate struct pmem_lpg *pl_next;
950Sstevel@tonic-gate struct pmem_lpg *pl_prev;
960Sstevel@tonic-gate } pmem_lpg_t;
970Sstevel@tonic-gate
980Sstevel@tonic-gate static size_t pmem_lpgsize; /* the size of one large page */
990Sstevel@tonic-gate static pgcnt_t pmem_pgcnt; /* the number of small pages in a large page */
1000Sstevel@tonic-gate static uint_t pmem_lszc; /* page size code of the large page */
1010Sstevel@tonic-gate /* The segment to be associated with all the allocated pages. */
1020Sstevel@tonic-gate static struct seg pmem_seg;
1030Sstevel@tonic-gate /* Fully occupied large pages allocated for pmem. */
1040Sstevel@tonic-gate static pmem_lpg_t *pmem_occ_lpgs;
1050Sstevel@tonic-gate /* Memory pool to store residual small pages from large pages. */
1060Sstevel@tonic-gate static page_t *pmem_mpool = NULL;
1070Sstevel@tonic-gate /* Number of small pages reside in pmem_mpool currently. */
1080Sstevel@tonic-gate static pgcnt_t pmem_nmpages = 0;
1090Sstevel@tonic-gate /* To protect pmem_nmpages, pmem_mpool and pmem_occ_lpgs. */
1100Sstevel@tonic-gate kmutex_t pmem_mutex;
1110Sstevel@tonic-gate
1120Sstevel@tonic-gate static int lpg_isfree(pmem_lpg_t *);
1130Sstevel@tonic-gate static void pmem_lpg_sub(pmem_lpg_t **, pmem_lpg_t *);
1140Sstevel@tonic-gate static void pmem_lpg_concat(pmem_lpg_t **, pmem_lpg_t **);
1150Sstevel@tonic-gate static pmem_lpg_t *pmem_lpg_get(pmem_lpg_t *, page_t *, pmem_lpg_t **);
1160Sstevel@tonic-gate static pmem_lpg_t *pmem_lpg_alloc(uint_t);
1170Sstevel@tonic-gate static void pmem_lpg_free(pmem_lpg_t **, pmem_lpg_t *);
1180Sstevel@tonic-gate static void lpg_free(page_t *spp);
1190Sstevel@tonic-gate static pgcnt_t mpool_break(page_t **, pgcnt_t);
1200Sstevel@tonic-gate static void mpool_append(page_t **, pgcnt_t);
1210Sstevel@tonic-gate static void lpp_break(page_t **, pgcnt_t, pgcnt_t, pmem_lpg_t *);
1220Sstevel@tonic-gate static void lpp_free(page_t *, pgcnt_t, pmem_lpg_t **);
1230Sstevel@tonic-gate static int lpp_create(page_t **, pgcnt_t, pgcnt_t *, pmem_lpg_t **,
1240Sstevel@tonic-gate vnode_t *, u_offset_t *, uint_t);
1250Sstevel@tonic-gate static void tlist_in(page_t *, pgcnt_t, vnode_t *, u_offset_t *);
1260Sstevel@tonic-gate static void tlist_out(page_t *, pgcnt_t);
1270Sstevel@tonic-gate static int pmem_cookie_alloc(struct devmap_pmem_cookie **, pgcnt_t, uint_t);
1282768Ssl108498 static int pmem_lock(pgcnt_t, proc_t *p);
1290Sstevel@tonic-gate
1300Sstevel@tonic-gate /*
1310Sstevel@tonic-gate * Called by driver devmap routine to pass physical memory mapping info to
1320Sstevel@tonic-gate * seg_dev framework, used only for physical memory allocated from
1330Sstevel@tonic-gate * devmap_pmem_alloc().
1340Sstevel@tonic-gate */
1350Sstevel@tonic-gate /* ARGSUSED */
1360Sstevel@tonic-gate int
devmap_pmem_setup(devmap_cookie_t dhc,dev_info_t * dip,struct devmap_callback_ctl * callbackops,devmap_pmem_cookie_t cookie,offset_t off,size_t len,uint_t maxprot,uint_t flags,ddi_device_acc_attr_t * accattrp)1370Sstevel@tonic-gate devmap_pmem_setup(devmap_cookie_t dhc, dev_info_t *dip,
1380Sstevel@tonic-gate struct devmap_callback_ctl *callbackops, devmap_pmem_cookie_t cookie,
1390Sstevel@tonic-gate offset_t off, size_t len, uint_t maxprot, uint_t flags,
1400Sstevel@tonic-gate ddi_device_acc_attr_t *accattrp)
1410Sstevel@tonic-gate {
1420Sstevel@tonic-gate devmap_handle_t *dhp = (devmap_handle_t *)dhc;
1430Sstevel@tonic-gate struct devmap_pmem_cookie *pcp = (struct devmap_pmem_cookie *)cookie;
1441900Seota uint_t cache_attr = IOMEM_CACHE_ATTR(flags);
1450Sstevel@tonic-gate
1460Sstevel@tonic-gate if (pcp == NULL || (off + len) > ptob(pcp->dp_npages))
1470Sstevel@tonic-gate return (DDI_FAILURE);
1480Sstevel@tonic-gate
1490Sstevel@tonic-gate /*
1500Sstevel@tonic-gate * First to check if this function has been called for this dhp.
1510Sstevel@tonic-gate */
1520Sstevel@tonic-gate if (dhp->dh_flags & DEVMAP_SETUP_DONE)
1530Sstevel@tonic-gate return (DDI_FAILURE);
1540Sstevel@tonic-gate
1550Sstevel@tonic-gate if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
1560Sstevel@tonic-gate return (DDI_FAILURE);
1570Sstevel@tonic-gate
1581900Seota /*
1591900Seota * Check if the cache attributes are supported. Need to pay
1601900Seota * attention that only uncachable or write-combining is
1611900Seota * permitted for pmem.
1621900Seota */
1631900Seota if (i_ddi_check_cache_attr(flags) == B_FALSE ||
1641900Seota (cache_attr & (IOMEM_DATA_UNCACHED|IOMEM_DATA_UC_WR_COMBINE)) == 0)
1651900Seota return (DDI_FAILURE);
1661900Seota
1670Sstevel@tonic-gate if (flags & DEVMAP_MAPPING_INVALID) {
1680Sstevel@tonic-gate /*
1690Sstevel@tonic-gate * If DEVMAP_MAPPING_INVALID is specified, we have to grant
1700Sstevel@tonic-gate * remap permission.
1710Sstevel@tonic-gate */
1720Sstevel@tonic-gate if (!(flags & DEVMAP_ALLOW_REMAP))
1730Sstevel@tonic-gate return (DDI_FAILURE);
1740Sstevel@tonic-gate } else {
1750Sstevel@tonic-gate dhp->dh_pcookie = (devmap_pmem_cookie_t)pcp;
1760Sstevel@tonic-gate /* dh_roff is the offset inside the dh_pcookie. */
1770Sstevel@tonic-gate dhp->dh_roff = ptob(btop(off));
1781900Seota /* Set the cache attributes correctly */
1791900Seota i_ddi_cacheattr_to_hatacc(cache_attr, &dhp->dh_hat_attr);
1800Sstevel@tonic-gate }
1810Sstevel@tonic-gate
1820Sstevel@tonic-gate dhp->dh_cookie = DEVMAP_PMEM_COOKIE;
1830Sstevel@tonic-gate dhp->dh_flags |= (flags & DEVMAP_SETUP_FLAGS);
1840Sstevel@tonic-gate dhp->dh_len = ptob(btopr(len));
1850Sstevel@tonic-gate
1860Sstevel@tonic-gate dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
1870Sstevel@tonic-gate ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
1880Sstevel@tonic-gate
1890Sstevel@tonic-gate if (callbackops != NULL) {
1900Sstevel@tonic-gate bcopy(callbackops, &dhp->dh_callbackops,
1910Sstevel@tonic-gate sizeof (struct devmap_callback_ctl));
1920Sstevel@tonic-gate }
1930Sstevel@tonic-gate
1940Sstevel@tonic-gate /*
1950Sstevel@tonic-gate * Initialize dh_lock if we want to do remap.
1960Sstevel@tonic-gate */
1970Sstevel@tonic-gate if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) {
1980Sstevel@tonic-gate mutex_init(&dhp->dh_lock, NULL, MUTEX_DEFAULT, NULL);
1990Sstevel@tonic-gate dhp->dh_flags |= DEVMAP_LOCK_INITED;
2000Sstevel@tonic-gate }
2010Sstevel@tonic-gate
2020Sstevel@tonic-gate dhp->dh_flags |= DEVMAP_SETUP_DONE;
2030Sstevel@tonic-gate
2040Sstevel@tonic-gate return (DDI_SUCCESS);
2050Sstevel@tonic-gate }
2060Sstevel@tonic-gate
2070Sstevel@tonic-gate /*
2080Sstevel@tonic-gate * Replace existing mapping using a new cookie, mainly gets called when doing
2090Sstevel@tonic-gate * fork(). Should be called in associated devmap_dup(9E).
2100Sstevel@tonic-gate */
2110Sstevel@tonic-gate /* ARGSUSED */
2120Sstevel@tonic-gate int
devmap_pmem_remap(devmap_cookie_t dhc,dev_info_t * dip,devmap_pmem_cookie_t cookie,offset_t off,size_t len,uint_t maxprot,uint_t flags,ddi_device_acc_attr_t * accattrp)2130Sstevel@tonic-gate devmap_pmem_remap(devmap_cookie_t dhc, dev_info_t *dip,
2140Sstevel@tonic-gate devmap_pmem_cookie_t cookie, offset_t off, size_t len, uint_t maxprot,
2150Sstevel@tonic-gate uint_t flags, ddi_device_acc_attr_t *accattrp)
2160Sstevel@tonic-gate {
2170Sstevel@tonic-gate devmap_handle_t *dhp = (devmap_handle_t *)dhc;
2180Sstevel@tonic-gate struct devmap_pmem_cookie *pcp = (struct devmap_pmem_cookie *)cookie;
2191900Seota uint_t cache_attr = IOMEM_CACHE_ATTR(flags);
2200Sstevel@tonic-gate
2210Sstevel@tonic-gate /*
2220Sstevel@tonic-gate * Reture failure if setup has not been done or no remap permission
2230Sstevel@tonic-gate * has been granted during the setup.
2240Sstevel@tonic-gate */
2250Sstevel@tonic-gate if ((dhp->dh_flags & DEVMAP_SETUP_DONE) == 0 ||
2260Sstevel@tonic-gate (dhp->dh_flags & DEVMAP_ALLOW_REMAP) == 0)
2270Sstevel@tonic-gate return (DDI_FAILURE);
2280Sstevel@tonic-gate
2290Sstevel@tonic-gate /* No flags supported for remap yet. */
2300Sstevel@tonic-gate if (flags != 0)
2310Sstevel@tonic-gate return (DDI_FAILURE);
2320Sstevel@tonic-gate
2330Sstevel@tonic-gate if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
2340Sstevel@tonic-gate return (DDI_FAILURE);
2350Sstevel@tonic-gate
2360Sstevel@tonic-gate if (pcp == NULL || (off + len) > ptob(pcp->dp_npages))
2370Sstevel@tonic-gate return (DDI_FAILURE);
2380Sstevel@tonic-gate
2391900Seota /*
2401900Seota * Check if the cache attributes are supported. Need to pay
2411900Seota * attention that only uncachable or write-combining is
2421900Seota * permitted for pmem.
2431900Seota */
2441900Seota if (i_ddi_check_cache_attr(flags) == B_FALSE ||
2451900Seota (cache_attr & (IOMEM_DATA_UNCACHED|IOMEM_DATA_UC_WR_COMBINE)) == 0)
2461900Seota return (DDI_FAILURE);
2471900Seota
2480Sstevel@tonic-gate HOLD_DHP_LOCK(dhp);
2490Sstevel@tonic-gate /*
2500Sstevel@tonic-gate * Unload the old mapping of pages reloated with this dhp, so next
2510Sstevel@tonic-gate * fault will setup the new mappings. It is in segdev_faultpage that
2520Sstevel@tonic-gate * calls hat_devload to establish the mapping. Do this while holding
2530Sstevel@tonic-gate * the dhp lock so other faults dont reestablish the mappings.
2540Sstevel@tonic-gate */
2550Sstevel@tonic-gate hat_unload(dhp->dh_seg->s_as->a_hat, dhp->dh_uvaddr,
2560Sstevel@tonic-gate dhp->dh_len, HAT_UNLOAD|HAT_UNLOAD_OTHER);
2570Sstevel@tonic-gate
2581900Seota /* Set the cache attributes correctly */
2591900Seota i_ddi_cacheattr_to_hatacc(cache_attr, &dhp->dh_hat_attr);
2601900Seota
2610Sstevel@tonic-gate dhp->dh_pcookie = cookie;
2620Sstevel@tonic-gate dhp->dh_roff = ptob(btop(off));
2630Sstevel@tonic-gate dhp->dh_len = ptob(btopr(len));
2640Sstevel@tonic-gate
2650Sstevel@tonic-gate /* Clear the large page size flag. */
2660Sstevel@tonic-gate dhp->dh_flags &= ~DEVMAP_FLAG_LARGE;
2670Sstevel@tonic-gate
2680Sstevel@tonic-gate dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
2690Sstevel@tonic-gate ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
2700Sstevel@tonic-gate RELE_DHP_LOCK(dhp);
2710Sstevel@tonic-gate return (DDI_SUCCESS);
2720Sstevel@tonic-gate }
2730Sstevel@tonic-gate
2740Sstevel@tonic-gate /*
2750Sstevel@tonic-gate * Directly (i.e., without occupying kernel virtual address space) allocate
2760Sstevel@tonic-gate * 'npages' physical memory pages for exporting to user land. The allocated
2770Sstevel@tonic-gate * page_t pointer will be recorded in cookie.
2780Sstevel@tonic-gate */
2790Sstevel@tonic-gate int
devmap_pmem_alloc(size_t size,uint_t flags,devmap_pmem_cookie_t * cookiep)2800Sstevel@tonic-gate devmap_pmem_alloc(size_t size, uint_t flags, devmap_pmem_cookie_t *cookiep)
2810Sstevel@tonic-gate {
2820Sstevel@tonic-gate u_offset_t pmem_off = 0;
2830Sstevel@tonic-gate page_t *pp = NULL;
2840Sstevel@tonic-gate page_t *lpp = NULL;
2850Sstevel@tonic-gate page_t *tlist = NULL;
2860Sstevel@tonic-gate pgcnt_t i = 0;
2870Sstevel@tonic-gate pgcnt_t rpages = 0;
2880Sstevel@tonic-gate pgcnt_t lpages = 0;
2890Sstevel@tonic-gate pgcnt_t tpages = 0;
2900Sstevel@tonic-gate pgcnt_t npages = btopr(size);
2910Sstevel@tonic-gate pmem_lpg_t *plp = NULL;
2920Sstevel@tonic-gate struct devmap_pmem_cookie *pcp;
2930Sstevel@tonic-gate uint_t reserved = 0;
2940Sstevel@tonic-gate uint_t locked = 0;
2950Sstevel@tonic-gate uint_t pflags, kflags;
2960Sstevel@tonic-gate
2970Sstevel@tonic-gate *cookiep = NULL;
2980Sstevel@tonic-gate
2990Sstevel@tonic-gate /*
3000Sstevel@tonic-gate * Number larger than this will cause page_create_va() to loop
3010Sstevel@tonic-gate * infinitely.
3020Sstevel@tonic-gate */
3030Sstevel@tonic-gate if (npages == 0 || npages >= total_pages / 2)
3040Sstevel@tonic-gate return (DDI_FAILURE);
3050Sstevel@tonic-gate if ((flags & (PMEM_SLEEP | PMEM_NOSLEEP)) == 0)
3060Sstevel@tonic-gate return (DDI_FAILURE);
3070Sstevel@tonic-gate pflags = flags & PMEM_NOSLEEP ? PG_EXCL : PG_WAIT;
3080Sstevel@tonic-gate kflags = flags & PMEM_NOSLEEP ? KM_NOSLEEP : KM_SLEEP;
3090Sstevel@tonic-gate
3100Sstevel@tonic-gate /* Allocate pmem cookie. */
3110Sstevel@tonic-gate if (pmem_cookie_alloc(&pcp, npages, kflags) == DDI_FAILURE)
3120Sstevel@tonic-gate return (DDI_FAILURE);
3130Sstevel@tonic-gate pcp->dp_npages = npages;
3140Sstevel@tonic-gate
3150Sstevel@tonic-gate /*
3162768Ssl108498 * See if the requested memory can be locked.
3170Sstevel@tonic-gate */
3182768Ssl108498 pcp->dp_proc = curproc;
3192768Ssl108498 if (pmem_lock(npages, curproc) == DDI_FAILURE)
3200Sstevel@tonic-gate goto alloc_fail;
3210Sstevel@tonic-gate locked = 1;
3220Sstevel@tonic-gate /*
3230Sstevel@tonic-gate * First, grab as many as possible from pmem_mpool. If pages in
3240Sstevel@tonic-gate * pmem_mpool are enough for this request, we are done.
3250Sstevel@tonic-gate */
3260Sstevel@tonic-gate mutex_enter(&pmem_mutex);
3270Sstevel@tonic-gate tpages = mpool_break(&tlist, npages);
3280Sstevel@tonic-gate /* IOlock and hashin them into the new offset. */
3290Sstevel@tonic-gate if (tpages)
3300Sstevel@tonic-gate tlist_in(tlist, tpages, pcp->dp_vnp, &pmem_off);
3310Sstevel@tonic-gate mutex_exit(&pmem_mutex);
3320Sstevel@tonic-gate
3330Sstevel@tonic-gate if (tpages == npages)
3340Sstevel@tonic-gate goto done;
3350Sstevel@tonic-gate
3360Sstevel@tonic-gate rpages = npages - tpages;
3370Sstevel@tonic-gate /* Quit now if memory cannot be reserved. */
3380Sstevel@tonic-gate if (!page_resv(rpages, kflags))
3390Sstevel@tonic-gate goto alloc_fail;
3400Sstevel@tonic-gate reserved = 1;
3410Sstevel@tonic-gate
342*5527Smrj /* If we have large pages */
343*5527Smrj if (pmem_lpgsize > PAGESIZE) {
344*5527Smrj /* Try to alloc large pages first to decrease fragmentation. */
345*5527Smrj i = (rpages + (pmem_pgcnt - 1)) / pmem_pgcnt;
346*5527Smrj if (lpp_create(&lpp, i, &lpages, &plp, pcp->dp_vnp, &pmem_off,
347*5527Smrj kflags) == DDI_FAILURE)
348*5527Smrj goto alloc_fail;
349*5527Smrj ASSERT(lpages == 0 ? lpp == NULL : 1);
350*5527Smrj }
3510Sstevel@tonic-gate
3520Sstevel@tonic-gate /*
3530Sstevel@tonic-gate * Pages in large pages is more than the request, put the residual
3540Sstevel@tonic-gate * pages into pmem_mpool.
3550Sstevel@tonic-gate */
3560Sstevel@tonic-gate if (lpages >= rpages) {
3570Sstevel@tonic-gate lpp_break(&lpp, lpages, lpages - rpages, plp);
3580Sstevel@tonic-gate goto done;
3590Sstevel@tonic-gate }
3600Sstevel@tonic-gate
3610Sstevel@tonic-gate /* Allocate small pages if lpp+tlist cannot satisfy the request. */
3620Sstevel@tonic-gate i = rpages - lpages;
3630Sstevel@tonic-gate if ((pp = page_create_va(pcp->dp_vnp, pmem_off, ptob(i),
364417Sms148562 pflags, &pmem_seg, (caddr_t)(uintptr_t)pmem_off)) == NULL)
3650Sstevel@tonic-gate goto alloc_fail;
3660Sstevel@tonic-gate
3670Sstevel@tonic-gate done:
3680Sstevel@tonic-gate page_list_concat(&tlist, &lpp);
3690Sstevel@tonic-gate page_list_concat(&tlist, &pp);
3700Sstevel@tonic-gate /* Set those small pages from large pages as allocated. */
3710Sstevel@tonic-gate mutex_enter(&pmem_mutex);
3720Sstevel@tonic-gate pmem_lpg_concat(&pmem_occ_lpgs, &plp);
3730Sstevel@tonic-gate mutex_exit(&pmem_mutex);
3740Sstevel@tonic-gate
3750Sstevel@tonic-gate /*
3760Sstevel@tonic-gate * Now tlist holds all the pages for this cookie. Record these pages in
3770Sstevel@tonic-gate * pmem cookie.
3780Sstevel@tonic-gate */
3790Sstevel@tonic-gate for (pp = tlist, i = 0; i < npages; i++) {
3800Sstevel@tonic-gate pcp->dp_pparray[i] = pp;
3810Sstevel@tonic-gate page_io_unlock(pp);
3820Sstevel@tonic-gate pp = pp->p_next;
3830Sstevel@tonic-gate page_sub(&tlist, pp->p_prev);
3840Sstevel@tonic-gate }
3850Sstevel@tonic-gate ASSERT(tlist == NULL);
3860Sstevel@tonic-gate *cookiep = (devmap_pmem_cookie_t)pcp;
3870Sstevel@tonic-gate
3880Sstevel@tonic-gate return (DDI_SUCCESS);
3890Sstevel@tonic-gate
3900Sstevel@tonic-gate alloc_fail:
3910Sstevel@tonic-gate DTRACE_PROBE(pmem__alloc__fail);
3920Sstevel@tonic-gate /* Free large pages and the associated allocation records. */
3930Sstevel@tonic-gate if (lpp)
3940Sstevel@tonic-gate lpp_free(lpp, lpages / pmem_pgcnt, &plp);
3950Sstevel@tonic-gate if (reserved == 1)
3960Sstevel@tonic-gate page_unresv(rpages);
3970Sstevel@tonic-gate /* Put those pages in tlist back into pmem_mpool. */
3980Sstevel@tonic-gate if (tpages != 0) {
3990Sstevel@tonic-gate mutex_enter(&pmem_mutex);
4000Sstevel@tonic-gate /* IOunlock, hashout and update the allocation records. */
4010Sstevel@tonic-gate tlist_out(tlist, tpages);
4020Sstevel@tonic-gate mpool_append(&tlist, tpages);
4030Sstevel@tonic-gate mutex_exit(&pmem_mutex);
4040Sstevel@tonic-gate }
4050Sstevel@tonic-gate if (locked == 1)
4062768Ssl108498 i_ddi_decr_locked_memory(pcp->dp_proc, ptob(pcp->dp_npages));
4070Sstevel@tonic-gate /* Freeing pmem_cookie. */
4080Sstevel@tonic-gate kmem_free(pcp->dp_vnp, sizeof (vnode_t));
4090Sstevel@tonic-gate kmem_free(pcp->dp_pparray, npages * sizeof (page_t *));
4100Sstevel@tonic-gate kmem_free(pcp, sizeof (struct devmap_pmem_cookie));
4110Sstevel@tonic-gate return (DDI_FAILURE);
4120Sstevel@tonic-gate }
4130Sstevel@tonic-gate
4140Sstevel@tonic-gate /*
4150Sstevel@tonic-gate * Free all small pages inside cookie, and return pages from large pages into
4160Sstevel@tonic-gate * mpool, if all the pages from one large page is in mpool, free it as a whole.
4170Sstevel@tonic-gate */
4180Sstevel@tonic-gate void
devmap_pmem_free(devmap_pmem_cookie_t cookie)4190Sstevel@tonic-gate devmap_pmem_free(devmap_pmem_cookie_t cookie)
4200Sstevel@tonic-gate {
4210Sstevel@tonic-gate struct devmap_pmem_cookie *pcp = (struct devmap_pmem_cookie *)cookie;
4220Sstevel@tonic-gate pgcnt_t i;
4230Sstevel@tonic-gate pgcnt_t tpages = 0;
4240Sstevel@tonic-gate page_t *pp;
4250Sstevel@tonic-gate pmem_lpg_t *pl1, *plp;
4260Sstevel@tonic-gate pmem_lpg_t *pf_lpgs = NULL;
4270Sstevel@tonic-gate uint_t npls = 0;
4280Sstevel@tonic-gate pmem_lpg_t *last_pl = NULL;
4290Sstevel@tonic-gate pmem_lpg_t *plast_pl = NULL;
4300Sstevel@tonic-gate
4310Sstevel@tonic-gate ASSERT(pcp);
4320Sstevel@tonic-gate mutex_enter(&pmem_mutex);
4330Sstevel@tonic-gate /* Free small pages and return them to memory pool. */
4340Sstevel@tonic-gate for (i = pcp->dp_npages; i > 0; i--) {
4350Sstevel@tonic-gate pp = pcp->dp_pparray[i - 1];
4360Sstevel@tonic-gate page_hashout(pp, NULL);
4370Sstevel@tonic-gate /*
4380Sstevel@tonic-gate * Remove the mapping of this single page, this mapping is
4390Sstevel@tonic-gate * created using hat_devload() in segdev_faultpage().
4400Sstevel@tonic-gate */
4410Sstevel@tonic-gate (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD);
4420Sstevel@tonic-gate if (!FROM_LPG(pp)) {
4430Sstevel@tonic-gate /* Normal small page. */
4440Sstevel@tonic-gate page_free(pp, 1);
4450Sstevel@tonic-gate page_unresv(1);
4460Sstevel@tonic-gate } else {
4470Sstevel@tonic-gate /* Small page from large pages. */
4480Sstevel@tonic-gate plp = pmem_lpg_get(pmem_occ_lpgs, pp, &last_pl);
4490Sstevel@tonic-gate if (plp && !(plp->pl_pfree)) {
4500Sstevel@tonic-gate /*
4510Sstevel@tonic-gate * Move this record to pf_lpgs list, this large
4520Sstevel@tonic-gate * page may be able to be freed as a whole.
4530Sstevel@tonic-gate */
4540Sstevel@tonic-gate pmem_lpg_sub(&pmem_occ_lpgs, plp);
4550Sstevel@tonic-gate pmem_lpg_concat(&pf_lpgs, &plp);
4560Sstevel@tonic-gate plp->pl_pfree = 1;
4570Sstevel@tonic-gate npls++;
4580Sstevel@tonic-gate last_pl = NULL;
4590Sstevel@tonic-gate } else {
4600Sstevel@tonic-gate /* Search in pf_lpgs list. */
4610Sstevel@tonic-gate plp = pmem_lpg_get(pf_lpgs, pp, &plast_pl);
4620Sstevel@tonic-gate }
4630Sstevel@tonic-gate ASSERT(plp);
4640Sstevel@tonic-gate /* Mark this page as free. */
4650Sstevel@tonic-gate BT_SET(plp->pl_bitmap, PFIND(pp));
4660Sstevel@tonic-gate /* Record this page in pmem_mpool. */
4670Sstevel@tonic-gate mpool_append(&pp, 1);
4680Sstevel@tonic-gate }
4690Sstevel@tonic-gate }
4700Sstevel@tonic-gate
4710Sstevel@tonic-gate /*
4720Sstevel@tonic-gate * Find out the large pages whose pages have been freed, remove them
4730Sstevel@tonic-gate * from plp list, free them and the associated pmem_lpg struct.
4740Sstevel@tonic-gate */
4750Sstevel@tonic-gate for (plp = pf_lpgs; npls != 0; npls--) {
4760Sstevel@tonic-gate pl1 = plp;
4770Sstevel@tonic-gate plp = plp->pl_next;
4780Sstevel@tonic-gate if (lpg_isfree(pl1)) {
4790Sstevel@tonic-gate /*
4800Sstevel@tonic-gate * Get one free large page. Find all pages in this
4810Sstevel@tonic-gate * large page and remove them from pmem_mpool.
4820Sstevel@tonic-gate */
4830Sstevel@tonic-gate lpg_free(pl1->pl_pp);
4840Sstevel@tonic-gate /* Remove associated allocation records. */
4850Sstevel@tonic-gate pmem_lpg_sub(&pf_lpgs, pl1);
4860Sstevel@tonic-gate pmem_lpg_free(&pf_lpgs, pl1);
4870Sstevel@tonic-gate tpages -= pmem_pgcnt;
4880Sstevel@tonic-gate } else
4890Sstevel@tonic-gate pl1->pl_pfree = 0;
4900Sstevel@tonic-gate }
4910Sstevel@tonic-gate /* Update allocation records accordingly. */
4920Sstevel@tonic-gate pmem_lpg_concat(&pmem_occ_lpgs, &pf_lpgs);
4930Sstevel@tonic-gate mutex_exit(&pmem_mutex);
4940Sstevel@tonic-gate
4952768Ssl108498 if (curproc == pcp->dp_proc)
4962768Ssl108498 i_ddi_decr_locked_memory(curproc, ptob(pcp->dp_npages));
4970Sstevel@tonic-gate kmem_free(pcp->dp_vnp, sizeof (vnode_t));
4980Sstevel@tonic-gate kmem_free(pcp->dp_pparray, pcp->dp_npages * sizeof (page_t *));
4990Sstevel@tonic-gate kmem_free(pcp, sizeof (struct devmap_pmem_cookie));
5000Sstevel@tonic-gate }
5010Sstevel@tonic-gate
5020Sstevel@tonic-gate /*
5030Sstevel@tonic-gate * To extract page frame number from specified range in a cookie.
5040Sstevel@tonic-gate */
5050Sstevel@tonic-gate int
devmap_pmem_getpfns(devmap_pmem_cookie_t cookie,uint_t start,pgcnt_t npages,pfn_t * pfnarray)5060Sstevel@tonic-gate devmap_pmem_getpfns(devmap_pmem_cookie_t cookie, uint_t start, pgcnt_t npages,
5070Sstevel@tonic-gate pfn_t *pfnarray)
5080Sstevel@tonic-gate {
5090Sstevel@tonic-gate struct devmap_pmem_cookie *pcp = (struct devmap_pmem_cookie *)cookie;
5100Sstevel@tonic-gate pgcnt_t i;
5110Sstevel@tonic-gate
5120Sstevel@tonic-gate if (pcp == NULL || start + npages > pcp->dp_npages)
5130Sstevel@tonic-gate return (DDI_FAILURE);
5140Sstevel@tonic-gate
5150Sstevel@tonic-gate for (i = start; i < start + npages; i++)
516*5527Smrj pfnarray[i - start] = pfn_to_mfn(pcp->dp_pparray[i]->p_pagenum);
517*5527Smrj
5180Sstevel@tonic-gate return (DDI_SUCCESS);
5190Sstevel@tonic-gate }
5200Sstevel@tonic-gate
5210Sstevel@tonic-gate void
pmem_init()5220Sstevel@tonic-gate pmem_init()
5230Sstevel@tonic-gate {
5240Sstevel@tonic-gate mutex_init(&pmem_mutex, NULL, MUTEX_DEFAULT, NULL);
5250Sstevel@tonic-gate pmem_lszc = MIN(1, page_num_pagesizes() - 1);
5260Sstevel@tonic-gate pmem_lpgsize = page_get_pagesize(pmem_lszc);
5270Sstevel@tonic-gate pmem_pgcnt = pmem_lpgsize >> PAGESHIFT;
5280Sstevel@tonic-gate bzero(&pmem_seg, sizeof (struct seg));
5290Sstevel@tonic-gate pmem_seg.s_as = &kas;
5300Sstevel@tonic-gate }
5310Sstevel@tonic-gate
5320Sstevel@tonic-gate /* Allocate kernel memory for one pmem cookie with n pages. */
5330Sstevel@tonic-gate static int
pmem_cookie_alloc(struct devmap_pmem_cookie ** pcpp,pgcnt_t n,uint_t kflags)5340Sstevel@tonic-gate pmem_cookie_alloc(struct devmap_pmem_cookie **pcpp, pgcnt_t n, uint_t kflags)
5350Sstevel@tonic-gate {
5360Sstevel@tonic-gate struct devmap_pmem_cookie *pcp;
5370Sstevel@tonic-gate
5380Sstevel@tonic-gate if ((*pcpp = kmem_zalloc(sizeof (struct devmap_pmem_cookie),
5390Sstevel@tonic-gate kflags)) == NULL)
5400Sstevel@tonic-gate return (DDI_FAILURE);
5410Sstevel@tonic-gate pcp = *pcpp;
5420Sstevel@tonic-gate if ((pcp->dp_vnp =
5430Sstevel@tonic-gate kmem_zalloc(sizeof (vnode_t), kflags)) == NULL) {
5440Sstevel@tonic-gate kmem_free(pcp, sizeof (struct devmap_pmem_cookie));
5450Sstevel@tonic-gate return (DDI_FAILURE);
5460Sstevel@tonic-gate }
5470Sstevel@tonic-gate if ((pcp->dp_pparray =
5480Sstevel@tonic-gate kmem_zalloc(n * sizeof (page_t *), kflags)) == NULL) {
5490Sstevel@tonic-gate kmem_free(pcp->dp_vnp, sizeof (vnode_t));
5500Sstevel@tonic-gate kmem_free(pcp, sizeof (struct devmap_pmem_cookie));
5510Sstevel@tonic-gate return (DDI_FAILURE);
5520Sstevel@tonic-gate }
5530Sstevel@tonic-gate return (DDI_SUCCESS);
5540Sstevel@tonic-gate }
5550Sstevel@tonic-gate
5562768Ssl108498 /* Try to lock down n pages resource */
5570Sstevel@tonic-gate static int
pmem_lock(pgcnt_t n,proc_t * p)5582768Ssl108498 pmem_lock(pgcnt_t n, proc_t *p)
5590Sstevel@tonic-gate {
5602768Ssl108498 if (i_ddi_incr_locked_memory(p, ptob(n)) != 0) {
5610Sstevel@tonic-gate return (DDI_FAILURE);
5620Sstevel@tonic-gate }
5630Sstevel@tonic-gate return (DDI_SUCCESS);
5640Sstevel@tonic-gate }
5650Sstevel@tonic-gate
5660Sstevel@tonic-gate /* To check if all the pages in a large page are freed. */
5670Sstevel@tonic-gate static int
lpg_isfree(pmem_lpg_t * plp)5680Sstevel@tonic-gate lpg_isfree(pmem_lpg_t *plp)
5690Sstevel@tonic-gate {
5700Sstevel@tonic-gate uint_t i;
5710Sstevel@tonic-gate
5720Sstevel@tonic-gate for (i = 0; i < BT_BITOUL(pmem_pgcnt); i++)
5730Sstevel@tonic-gate if (plp->pl_bitmap[i] != BT_ULMAXMASK)
5740Sstevel@tonic-gate return (0);
5750Sstevel@tonic-gate /* All 1 means all pages are freed. */
5760Sstevel@tonic-gate return (1);
5770Sstevel@tonic-gate }
5780Sstevel@tonic-gate
5790Sstevel@tonic-gate /*
5800Sstevel@tonic-gate * Using pp to get the associated large page allocation record, searching in
5810Sstevel@tonic-gate * the splp linked list with *last as the heuristic pointer. Return NULL if
5820Sstevel@tonic-gate * not found.
5830Sstevel@tonic-gate */
5840Sstevel@tonic-gate static pmem_lpg_t *
pmem_lpg_get(pmem_lpg_t * splp,page_t * pp,pmem_lpg_t ** last)5850Sstevel@tonic-gate pmem_lpg_get(pmem_lpg_t *splp, page_t *pp, pmem_lpg_t **last)
5860Sstevel@tonic-gate {
5870Sstevel@tonic-gate pmem_lpg_t *plp;
5880Sstevel@tonic-gate pgcnt_t root_pfn;
5890Sstevel@tonic-gate
5900Sstevel@tonic-gate ASSERT(pp);
5910Sstevel@tonic-gate if (splp == NULL)
5920Sstevel@tonic-gate return (NULL);
5930Sstevel@tonic-gate root_pfn = page_pptonum(pp) & ~(pmem_pgcnt - 1);
5940Sstevel@tonic-gate
5950Sstevel@tonic-gate /* Try last winner first. */
5960Sstevel@tonic-gate if (*last && root_pfn == page_pptonum((*last)->pl_pp))
5970Sstevel@tonic-gate goto pl_found;
5980Sstevel@tonic-gate
5990Sstevel@tonic-gate /* Else search the whole pmem_lpg list. */
6000Sstevel@tonic-gate for (plp = splp; root_pfn != page_pptonum(plp->pl_pp); ) {
6010Sstevel@tonic-gate plp = plp->pl_next;
6020Sstevel@tonic-gate if (plp == splp) {
6030Sstevel@tonic-gate plp = NULL;
6040Sstevel@tonic-gate break;
6050Sstevel@tonic-gate }
6060Sstevel@tonic-gate ASSERT(plp->pl_pp);
6070Sstevel@tonic-gate }
6080Sstevel@tonic-gate
6090Sstevel@tonic-gate *last = plp;
6100Sstevel@tonic-gate
6110Sstevel@tonic-gate pl_found:
6120Sstevel@tonic-gate return (*last);
6130Sstevel@tonic-gate }
6140Sstevel@tonic-gate
6150Sstevel@tonic-gate /*
6160Sstevel@tonic-gate * Remove one pmem_lpg plp from the oplpp list.
6170Sstevel@tonic-gate */
6180Sstevel@tonic-gate static void
pmem_lpg_sub(pmem_lpg_t ** oplpp,pmem_lpg_t * plp)6190Sstevel@tonic-gate pmem_lpg_sub(pmem_lpg_t **oplpp, pmem_lpg_t *plp)
6200Sstevel@tonic-gate {
6210Sstevel@tonic-gate if (*oplpp == plp)
6220Sstevel@tonic-gate *oplpp = plp->pl_next; /* go to next pmem_lpg */
6230Sstevel@tonic-gate
6240Sstevel@tonic-gate if (*oplpp == plp)
6250Sstevel@tonic-gate *oplpp = NULL; /* pmem_lpg list is gone */
6260Sstevel@tonic-gate else {
6270Sstevel@tonic-gate plp->pl_prev->pl_next = plp->pl_next;
6280Sstevel@tonic-gate plp->pl_next->pl_prev = plp->pl_prev;
6290Sstevel@tonic-gate }
6300Sstevel@tonic-gate plp->pl_prev = plp->pl_next = plp; /* make plp a list of one */
6310Sstevel@tonic-gate }
6320Sstevel@tonic-gate
6330Sstevel@tonic-gate /*
6340Sstevel@tonic-gate * Concatenate page list nplpp onto the end of list plpp.
6350Sstevel@tonic-gate */
6360Sstevel@tonic-gate static void
pmem_lpg_concat(pmem_lpg_t ** plpp,pmem_lpg_t ** nplpp)6370Sstevel@tonic-gate pmem_lpg_concat(pmem_lpg_t **plpp, pmem_lpg_t **nplpp)
6380Sstevel@tonic-gate {
6390Sstevel@tonic-gate pmem_lpg_t *s1p, *s2p, *e1p, *e2p;
6400Sstevel@tonic-gate
6410Sstevel@tonic-gate if (*nplpp == NULL) {
6420Sstevel@tonic-gate return;
6430Sstevel@tonic-gate }
6440Sstevel@tonic-gate if (*plpp == NULL) {
6450Sstevel@tonic-gate *plpp = *nplpp;
6460Sstevel@tonic-gate return;
6470Sstevel@tonic-gate }
6480Sstevel@tonic-gate s1p = *plpp;
6490Sstevel@tonic-gate e1p = s1p->pl_prev;
6500Sstevel@tonic-gate s2p = *nplpp;
6510Sstevel@tonic-gate e2p = s2p->pl_prev;
6520Sstevel@tonic-gate s1p->pl_prev = e2p;
6530Sstevel@tonic-gate e2p->pl_next = s1p;
6540Sstevel@tonic-gate e1p->pl_next = s2p;
6550Sstevel@tonic-gate s2p->pl_prev = e1p;
6560Sstevel@tonic-gate }
6570Sstevel@tonic-gate
6580Sstevel@tonic-gate /*
6590Sstevel@tonic-gate * Allocate and initialize the allocation record of one large page, the init
6600Sstevel@tonic-gate * value is 'allocated'.
6610Sstevel@tonic-gate */
6620Sstevel@tonic-gate static pmem_lpg_t *
pmem_lpg_alloc(uint_t kflags)6630Sstevel@tonic-gate pmem_lpg_alloc(uint_t kflags)
6640Sstevel@tonic-gate {
6650Sstevel@tonic-gate pmem_lpg_t *plp;
6660Sstevel@tonic-gate
6670Sstevel@tonic-gate ASSERT(pmem_pgcnt % BT_NBIPUL == 0);
6680Sstevel@tonic-gate plp = kmem_zalloc(sizeof (pmem_lpg_t), kflags);
6690Sstevel@tonic-gate if (plp == NULL)
6700Sstevel@tonic-gate return (NULL);
6710Sstevel@tonic-gate plp->pl_bitmap = kmem_zalloc(BT_SIZEOFMAP(pmem_pgcnt), kflags);
6720Sstevel@tonic-gate if (plp->pl_bitmap == NULL) {
6730Sstevel@tonic-gate kmem_free(plp, sizeof (*plp));
6740Sstevel@tonic-gate return (NULL);
6750Sstevel@tonic-gate }
6760Sstevel@tonic-gate plp->pl_next = plp->pl_prev = plp;
6770Sstevel@tonic-gate return (plp);
6780Sstevel@tonic-gate }
6790Sstevel@tonic-gate
6800Sstevel@tonic-gate /* Free one allocation record pointed by oplp. */
6810Sstevel@tonic-gate static void
pmem_lpg_free(pmem_lpg_t ** headp,pmem_lpg_t * plp)6820Sstevel@tonic-gate pmem_lpg_free(pmem_lpg_t **headp, pmem_lpg_t *plp)
6830Sstevel@tonic-gate {
6840Sstevel@tonic-gate if (*headp == plp)
6850Sstevel@tonic-gate *headp = plp->pl_next; /* go to next pmem_lpg_t */
6860Sstevel@tonic-gate
6870Sstevel@tonic-gate if (*headp == plp)
6880Sstevel@tonic-gate *headp = NULL; /* this list is gone */
6890Sstevel@tonic-gate else {
6900Sstevel@tonic-gate plp->pl_prev->pl_next = plp->pl_next;
6910Sstevel@tonic-gate plp->pl_next->pl_prev = plp->pl_prev;
6920Sstevel@tonic-gate }
6930Sstevel@tonic-gate kmem_free(plp->pl_bitmap, BT_SIZEOFMAP(pmem_pgcnt));
6940Sstevel@tonic-gate kmem_free(plp, sizeof (*plp));
6950Sstevel@tonic-gate }
6960Sstevel@tonic-gate
6970Sstevel@tonic-gate /* Free one large page headed by spp from pmem_mpool. */
6980Sstevel@tonic-gate static void
lpg_free(page_t * spp)6990Sstevel@tonic-gate lpg_free(page_t *spp)
7000Sstevel@tonic-gate {
7010Sstevel@tonic-gate page_t *pp1 = spp;
7020Sstevel@tonic-gate uint_t i;
7030Sstevel@tonic-gate
7040Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pmem_mutex));
7050Sstevel@tonic-gate for (i = 0; i < pmem_pgcnt; i++) {
7060Sstevel@tonic-gate /* Break pp1 from pmem_mpool. */
7070Sstevel@tonic-gate page_sub(&pmem_mpool, pp1);
7080Sstevel@tonic-gate pp1++;
7090Sstevel@tonic-gate }
7100Sstevel@tonic-gate /* Free pages in this large page. */
7110Sstevel@tonic-gate page_free_pages(spp);
7120Sstevel@tonic-gate page_unresv(pmem_pgcnt);
7130Sstevel@tonic-gate pmem_nmpages -= pmem_pgcnt;
7140Sstevel@tonic-gate ASSERT((pmem_nmpages && pmem_mpool) || (!pmem_nmpages && !pmem_mpool));
7150Sstevel@tonic-gate }
7160Sstevel@tonic-gate
7170Sstevel@tonic-gate /* Put n pages in *ppp list back into pmem_mpool. */
7180Sstevel@tonic-gate static void
mpool_append(page_t ** ppp,pgcnt_t n)7190Sstevel@tonic-gate mpool_append(page_t **ppp, pgcnt_t n)
7200Sstevel@tonic-gate {
7210Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pmem_mutex));
7220Sstevel@tonic-gate /* Put back pages. */
7230Sstevel@tonic-gate page_list_concat(&pmem_mpool, ppp);
7240Sstevel@tonic-gate pmem_nmpages += n;
7250Sstevel@tonic-gate ASSERT((pmem_nmpages && pmem_mpool) || (!pmem_nmpages && !pmem_mpool));
7260Sstevel@tonic-gate }
7270Sstevel@tonic-gate
7280Sstevel@tonic-gate /*
7290Sstevel@tonic-gate * Try to grab MIN(pmem_nmpages, n) pages from pmem_mpool, put them into *ppp
7300Sstevel@tonic-gate * list, and return the number of grabbed pages.
7310Sstevel@tonic-gate */
7320Sstevel@tonic-gate static pgcnt_t
mpool_break(page_t ** ppp,pgcnt_t n)7330Sstevel@tonic-gate mpool_break(page_t **ppp, pgcnt_t n)
7340Sstevel@tonic-gate {
7350Sstevel@tonic-gate pgcnt_t i;
7360Sstevel@tonic-gate
7370Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pmem_mutex));
7380Sstevel@tonic-gate /* Grab the pages. */
7390Sstevel@tonic-gate i = MIN(pmem_nmpages, n);
7400Sstevel@tonic-gate *ppp = pmem_mpool;
7410Sstevel@tonic-gate page_list_break(ppp, &pmem_mpool, i);
7420Sstevel@tonic-gate pmem_nmpages -= i;
7430Sstevel@tonic-gate ASSERT((pmem_nmpages && pmem_mpool) || (!pmem_nmpages && !pmem_mpool));
7440Sstevel@tonic-gate return (i);
7450Sstevel@tonic-gate }
7460Sstevel@tonic-gate
7470Sstevel@tonic-gate /*
7480Sstevel@tonic-gate * Create n large pages, lpages and plpp contains the number of small pages and
7490Sstevel@tonic-gate * allocation records list respectively.
7500Sstevel@tonic-gate */
7510Sstevel@tonic-gate static int
lpp_create(page_t ** lppp,pgcnt_t n,pgcnt_t * lpages,pmem_lpg_t ** plpp,vnode_t * vnp,u_offset_t * offp,uint_t kflags)7520Sstevel@tonic-gate lpp_create(page_t **lppp, pgcnt_t n, pgcnt_t *lpages, pmem_lpg_t **plpp,
7530Sstevel@tonic-gate vnode_t *vnp, u_offset_t *offp, uint_t kflags)
7540Sstevel@tonic-gate {
7550Sstevel@tonic-gate pgcnt_t i;
7560Sstevel@tonic-gate pmem_lpg_t *plp;
7570Sstevel@tonic-gate page_t *pp;
7580Sstevel@tonic-gate
7590Sstevel@tonic-gate for (i = 0, *lpages = 0; i < n; i++) {
7600Sstevel@tonic-gate /* Allocte one large page each time. */
7610Sstevel@tonic-gate pp = page_create_va_large(vnp, *offp, pmem_lpgsize,
762417Sms148562 PG_EXCL, &pmem_seg, (caddr_t)(uintptr_t)*offp, NULL);
7630Sstevel@tonic-gate if (pp == NULL)
7640Sstevel@tonic-gate break;
7650Sstevel@tonic-gate *offp += pmem_lpgsize;
7660Sstevel@tonic-gate page_list_concat(lppp, &pp);
7670Sstevel@tonic-gate *lpages += pmem_pgcnt;
7680Sstevel@tonic-gate /* Add one allocation record for this large page. */
7690Sstevel@tonic-gate if ((plp = pmem_lpg_alloc(kflags)) == NULL)
7700Sstevel@tonic-gate return (DDI_FAILURE);
7710Sstevel@tonic-gate plp->pl_pp = pp;
7720Sstevel@tonic-gate pmem_lpg_concat(plpp, &plp);
7730Sstevel@tonic-gate }
7740Sstevel@tonic-gate return (DDI_SUCCESS);
7750Sstevel@tonic-gate }
7760Sstevel@tonic-gate
7770Sstevel@tonic-gate /*
7780Sstevel@tonic-gate * Break the last r small pages from the large page list *lppp (with totally n
7790Sstevel@tonic-gate * small pages) and put them into pmem_mpool.
7800Sstevel@tonic-gate */
7810Sstevel@tonic-gate static void
lpp_break(page_t ** lppp,pgcnt_t n,pgcnt_t r,pmem_lpg_t * oplp)7820Sstevel@tonic-gate lpp_break(page_t **lppp, pgcnt_t n, pgcnt_t r, pmem_lpg_t *oplp)
7830Sstevel@tonic-gate {
7840Sstevel@tonic-gate page_t *pp, *pp1;
7850Sstevel@tonic-gate pgcnt_t i;
7860Sstevel@tonic-gate pmem_lpg_t *plp;
7870Sstevel@tonic-gate
7880Sstevel@tonic-gate if (r == 0)
7890Sstevel@tonic-gate return;
7900Sstevel@tonic-gate ASSERT(*lppp != NULL && r < pmem_pgcnt);
7910Sstevel@tonic-gate page_list_break(lppp, &pp, n - r);
7920Sstevel@tonic-gate
7930Sstevel@tonic-gate /* The residual should reside in the last large page. */
7940Sstevel@tonic-gate plp = oplp->pl_prev;
7950Sstevel@tonic-gate /* IOunlock and hashout the residual pages. */
7960Sstevel@tonic-gate for (pp1 = pp, i = 0; i < r; i++) {
7970Sstevel@tonic-gate page_io_unlock(pp1);
7980Sstevel@tonic-gate page_hashout(pp1, NULL);
7990Sstevel@tonic-gate /* Mark this page as free. */
8000Sstevel@tonic-gate BT_SET(plp->pl_bitmap, PFIND(pp1));
8010Sstevel@tonic-gate pp1 = pp1->p_next;
8020Sstevel@tonic-gate }
8030Sstevel@tonic-gate ASSERT(pp1 == pp);
8040Sstevel@tonic-gate /* Put these residual pages into memory pool. */
8050Sstevel@tonic-gate mutex_enter(&pmem_mutex);
8060Sstevel@tonic-gate mpool_append(&pp, r);
8070Sstevel@tonic-gate mutex_exit(&pmem_mutex);
8080Sstevel@tonic-gate }
8090Sstevel@tonic-gate
8100Sstevel@tonic-gate /* Freeing large pages in lpp and the associated allocation records in plp. */
8110Sstevel@tonic-gate static void
lpp_free(page_t * lpp,pgcnt_t lpgs,pmem_lpg_t ** plpp)8120Sstevel@tonic-gate lpp_free(page_t *lpp, pgcnt_t lpgs, pmem_lpg_t **plpp)
8130Sstevel@tonic-gate {
8140Sstevel@tonic-gate pgcnt_t i, j;
8150Sstevel@tonic-gate page_t *pp = lpp, *pp1;
8160Sstevel@tonic-gate pmem_lpg_t *plp1, *plp2;
8170Sstevel@tonic-gate
8180Sstevel@tonic-gate for (i = 0; i < lpgs; i++) {
8190Sstevel@tonic-gate for (j = 0; j < pmem_pgcnt; j++) {
8200Sstevel@tonic-gate /* IO unlock and hashout this small page. */
8210Sstevel@tonic-gate page_io_unlock(pp);
8220Sstevel@tonic-gate page_hashout(pp, NULL);
8230Sstevel@tonic-gate pp1 = pp->p_next;
8240Sstevel@tonic-gate pp->p_prev = pp->p_next = pp;
8250Sstevel@tonic-gate pp = pp1;
8260Sstevel@tonic-gate }
8270Sstevel@tonic-gate /* Free one large page at one time. */
8280Sstevel@tonic-gate page_free_pages(lpp);
8290Sstevel@tonic-gate lpp = pp;
8300Sstevel@tonic-gate }
8310Sstevel@tonic-gate /* Free associate pmem large page allocation records. */
8320Sstevel@tonic-gate for (plp1 = *plpp; *plpp; plp1 = plp2) {
8330Sstevel@tonic-gate plp2 = plp1->pl_next;
8340Sstevel@tonic-gate pmem_lpg_free(plpp, plp1);
8350Sstevel@tonic-gate }
8360Sstevel@tonic-gate }
8370Sstevel@tonic-gate
8380Sstevel@tonic-gate /*
8390Sstevel@tonic-gate * IOlock and hashin all pages in tlist, associate them with vnode *pvnp
8400Sstevel@tonic-gate * and offset starting with *poffp. Update allocation records accordingly at
8410Sstevel@tonic-gate * the same time.
8420Sstevel@tonic-gate */
8430Sstevel@tonic-gate static void
tlist_in(page_t * tlist,pgcnt_t tpages,vnode_t * pvnp,u_offset_t * poffp)8440Sstevel@tonic-gate tlist_in(page_t *tlist, pgcnt_t tpages, vnode_t *pvnp, u_offset_t *poffp)
8450Sstevel@tonic-gate {
8460Sstevel@tonic-gate page_t *pp;
8470Sstevel@tonic-gate pgcnt_t i = 0;
8480Sstevel@tonic-gate pmem_lpg_t *plp, *last_pl = NULL;
8490Sstevel@tonic-gate
8500Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pmem_mutex));
8510Sstevel@tonic-gate for (pp = tlist; i < tpages; i++) {
8520Sstevel@tonic-gate ASSERT(FROM_LPG(pp));
8530Sstevel@tonic-gate page_io_lock(pp);
8540Sstevel@tonic-gate (void) page_hashin(pp, pvnp, *poffp, NULL);
8550Sstevel@tonic-gate plp = pmem_lpg_get(pmem_occ_lpgs, pp, &last_pl);
8560Sstevel@tonic-gate /* Mark this page as allocated. */
8570Sstevel@tonic-gate BT_CLEAR(plp->pl_bitmap, PFIND(pp));
8580Sstevel@tonic-gate *poffp += PAGESIZE;
8590Sstevel@tonic-gate pp = pp->p_next;
8600Sstevel@tonic-gate }
8610Sstevel@tonic-gate ASSERT(pp == tlist);
8620Sstevel@tonic-gate }
8630Sstevel@tonic-gate
8640Sstevel@tonic-gate /*
8650Sstevel@tonic-gate * IOunlock and hashout all pages in tlist, update allocation records
8660Sstevel@tonic-gate * accordingly at the same time.
8670Sstevel@tonic-gate */
8680Sstevel@tonic-gate static void
tlist_out(page_t * tlist,pgcnt_t tpages)8690Sstevel@tonic-gate tlist_out(page_t *tlist, pgcnt_t tpages)
8700Sstevel@tonic-gate {
8710Sstevel@tonic-gate page_t *pp;
8720Sstevel@tonic-gate pgcnt_t i = 0;
8730Sstevel@tonic-gate pmem_lpg_t *plp, *last_pl = NULL;
8740Sstevel@tonic-gate
8750Sstevel@tonic-gate ASSERT(MUTEX_HELD(&pmem_mutex));
8760Sstevel@tonic-gate for (pp = tlist; i < tpages; i++) {
8770Sstevel@tonic-gate ASSERT(FROM_LPG(pp));
8780Sstevel@tonic-gate page_io_unlock(pp);
8790Sstevel@tonic-gate page_hashout(pp, NULL);
8800Sstevel@tonic-gate plp = pmem_lpg_get(pmem_occ_lpgs, pp, &last_pl);
8810Sstevel@tonic-gate /* Mark this page as free. */
8820Sstevel@tonic-gate BT_SET(plp->pl_bitmap, PFIND(pp));
8830Sstevel@tonic-gate pp = pp->p_next;
8840Sstevel@tonic-gate }
8850Sstevel@tonic-gate ASSERT(pp == tlist);
8860Sstevel@tonic-gate }
887