xref: /onnv-gate/usr/src/uts/sun4v/os/memseg.c (revision 11185:f0c31008e395)
110106SJason.Beloro@Sun.COM /*
210106SJason.Beloro@Sun.COM  *
310106SJason.Beloro@Sun.COM  * CDDL HEADER START
410106SJason.Beloro@Sun.COM  *
510106SJason.Beloro@Sun.COM  * The contents of this file are subject to the terms of the
610106SJason.Beloro@Sun.COM  * Common Development and Distribution License (the "License").
710106SJason.Beloro@Sun.COM  * You may not use this file except in compliance with the License.
810106SJason.Beloro@Sun.COM  *
910106SJason.Beloro@Sun.COM  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
1010106SJason.Beloro@Sun.COM  * or http://www.opensolaris.org/os/licensing.
1110106SJason.Beloro@Sun.COM  * See the License for the specific language governing permissions
1210106SJason.Beloro@Sun.COM  * and limitations under the License.
1310106SJason.Beloro@Sun.COM  *
1410106SJason.Beloro@Sun.COM  * When distributing Covered Code, include this CDDL HEADER in each
1510106SJason.Beloro@Sun.COM  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1610106SJason.Beloro@Sun.COM  * If applicable, add the following below this CDDL HEADER, with the
1710106SJason.Beloro@Sun.COM  * fields enclosed by brackets "[]" replaced with your own identifying
1810106SJason.Beloro@Sun.COM  * information: Portions Copyright [yyyy] [name of copyright owner]
1910106SJason.Beloro@Sun.COM  *
2010106SJason.Beloro@Sun.COM  * CDDL HEADER END
2110106SJason.Beloro@Sun.COM  */
2210106SJason.Beloro@Sun.COM /*
2310106SJason.Beloro@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
2410106SJason.Beloro@Sun.COM  * Use is subject to license terms.
2510106SJason.Beloro@Sun.COM  */
2610106SJason.Beloro@Sun.COM 
2710106SJason.Beloro@Sun.COM #include <sys/types.h>
2810106SJason.Beloro@Sun.COM #include <sys/cmn_err.h>
2910106SJason.Beloro@Sun.COM #include <sys/vm.h>
3010106SJason.Beloro@Sun.COM #include <sys/mman.h>
3110106SJason.Beloro@Sun.COM #include <vm/vm_dep.h>
3210106SJason.Beloro@Sun.COM #include <vm/seg_kmem.h>
3310106SJason.Beloro@Sun.COM #include <vm/seg_kpm.h>
3410106SJason.Beloro@Sun.COM #include <sys/mem_config.h>
3510106SJason.Beloro@Sun.COM #include <sys/sysmacros.h>
3610106SJason.Beloro@Sun.COM 
3710106SJason.Beloro@Sun.COM extern pgcnt_t pp_dummy_npages;
3810106SJason.Beloro@Sun.COM extern pfn_t *pp_dummy_pfn;	/* Array of dummy pfns. */
3910106SJason.Beloro@Sun.COM 
4010106SJason.Beloro@Sun.COM extern kmutex_t memseg_lists_lock;
4110106SJason.Beloro@Sun.COM extern struct memseg *memseg_va_avail;
4210106SJason.Beloro@Sun.COM extern struct memseg *memseg_alloc();
4310106SJason.Beloro@Sun.COM 
4410106SJason.Beloro@Sun.COM extern page_t *ppvm_base;
4510106SJason.Beloro@Sun.COM extern pgcnt_t ppvm_size;
4610106SJason.Beloro@Sun.COM 
4710106SJason.Beloro@Sun.COM static int sun4v_memseg_debug;
4810106SJason.Beloro@Sun.COM 
4910106SJason.Beloro@Sun.COM extern struct memseg *memseg_reuse(pgcnt_t);
5010106SJason.Beloro@Sun.COM extern void remap_to_dummy(caddr_t, pgcnt_t);
5110106SJason.Beloro@Sun.COM 
5210106SJason.Beloro@Sun.COM /*
5310106SJason.Beloro@Sun.COM  * The page_t memory for incoming pages is allocated from existing memory
5410106SJason.Beloro@Sun.COM  * which can create a potential situation where memory addition fails
5510106SJason.Beloro@Sun.COM  * because of shortage of existing memory.  To mitigate this situation
5610106SJason.Beloro@Sun.COM  * some memory is always reserved ahead of time for page_t allocation.
5710106SJason.Beloro@Sun.COM  * Each 4MB of reserved page_t's guarantees a 256MB (x64) addition without
5810106SJason.Beloro@Sun.COM  * page_t allocation.  The added 256MB added memory could theoretically
5910106SJason.Beloro@Sun.COM  * allow an addition of 16GB.
6010106SJason.Beloro@Sun.COM  */
6110106SJason.Beloro@Sun.COM #define	RSV_SIZE	0x40000000	/* add size with rsrvd page_t's 1G */
6210106SJason.Beloro@Sun.COM 
6310106SJason.Beloro@Sun.COM #ifdef	DEBUG
6410106SJason.Beloro@Sun.COM #define	MEMSEG_DEBUG(args...) if (sun4v_memseg_debug) printf(args)
6510106SJason.Beloro@Sun.COM #else
6610106SJason.Beloro@Sun.COM #define	MEMSEG_DEBUG(...)
6710106SJason.Beloro@Sun.COM #endif
6810106SJason.Beloro@Sun.COM 
6910106SJason.Beloro@Sun.COM /*
7010106SJason.Beloro@Sun.COM  * The page_t's for the incoming memory are allocated from
7110106SJason.Beloro@Sun.COM  * existing pages.
7210106SJason.Beloro@Sun.COM  */
7310106SJason.Beloro@Sun.COM /*ARGSUSED*/
7410106SJason.Beloro@Sun.COM int
memseg_alloc_meta(pfn_t base,pgcnt_t npgs,void ** ptp,pgcnt_t * metap)7510106SJason.Beloro@Sun.COM memseg_alloc_meta(pfn_t base, pgcnt_t npgs, void **ptp, pgcnt_t *metap)
7610106SJason.Beloro@Sun.COM {
77*11185SSean.McEnroe@Sun.COM 	page_t		*pp, *opp, *epp;
7810106SJason.Beloro@Sun.COM 	pgcnt_t		metapgs;
79*11185SSean.McEnroe@Sun.COM 	int		i;
8010106SJason.Beloro@Sun.COM 	struct seg	kseg;
8110106SJason.Beloro@Sun.COM 	caddr_t		vaddr;
8210106SJason.Beloro@Sun.COM 
8310106SJason.Beloro@Sun.COM 	/*
8410106SJason.Beloro@Sun.COM 	 * Verify incoming memory is within supported DR range.
8510106SJason.Beloro@Sun.COM 	 */
8610106SJason.Beloro@Sun.COM 	if ((base + npgs) * sizeof (page_t) > ppvm_size)
8710106SJason.Beloro@Sun.COM 		return (KPHYSM_ENOTSUP);
8810106SJason.Beloro@Sun.COM 
8910106SJason.Beloro@Sun.COM 	opp = pp = ppvm_base + base;
9010106SJason.Beloro@Sun.COM 	epp = pp + npgs;
9110106SJason.Beloro@Sun.COM 	metapgs = btopr(npgs * sizeof (page_t));
9210106SJason.Beloro@Sun.COM 
9310106SJason.Beloro@Sun.COM 	if (!IS_P2ALIGNED((uint64_t)pp, PAGESIZE) &&
94*11185SSean.McEnroe@Sun.COM 	    page_find(&mpvp, (u_offset_t)pp)) {
9510106SJason.Beloro@Sun.COM 		/*
9610106SJason.Beloro@Sun.COM 		 * Another memseg has page_t's in the same
9710106SJason.Beloro@Sun.COM 		 * page which 'pp' resides.  This would happen
9810106SJason.Beloro@Sun.COM 		 * if PAGESIZE is not an integral multiple of
9910106SJason.Beloro@Sun.COM 		 * sizeof (page_t) and therefore 'pp'
10010106SJason.Beloro@Sun.COM 		 * does not start on a page boundry.
10110106SJason.Beloro@Sun.COM 		 *
10210106SJason.Beloro@Sun.COM 		 * Since the other memseg's pages_t's still
10310106SJason.Beloro@Sun.COM 		 * map valid pages, skip allocation of this page.
10410106SJason.Beloro@Sun.COM 		 * Advance 'pp' to the next page which should
10510106SJason.Beloro@Sun.COM 		 * belong only to the incoming memseg.
10610106SJason.Beloro@Sun.COM 		 *
10710106SJason.Beloro@Sun.COM 		 * If the last page_t in the current page
10810106SJason.Beloro@Sun.COM 		 * crosses a page boundary, this should still
10910106SJason.Beloro@Sun.COM 		 * work.  The first part of the page_t is
11010106SJason.Beloro@Sun.COM 		 * already allocated.  The second part of
11110106SJason.Beloro@Sun.COM 		 * the page_t will be allocated below.
11210106SJason.Beloro@Sun.COM 		 */
11310106SJason.Beloro@Sun.COM 		ASSERT(PAGESIZE % sizeof (page_t));
11410106SJason.Beloro@Sun.COM 		pp = (page_t *)P2ROUNDUP((uint64_t)pp, PAGESIZE);
11510106SJason.Beloro@Sun.COM 		metapgs--;
11610106SJason.Beloro@Sun.COM 	}
11710106SJason.Beloro@Sun.COM 
11810106SJason.Beloro@Sun.COM 	if (!IS_P2ALIGNED((uint64_t)epp, PAGESIZE) &&
119*11185SSean.McEnroe@Sun.COM 	    page_find(&mpvp, (u_offset_t)epp)) {
12010106SJason.Beloro@Sun.COM 		/*
12110106SJason.Beloro@Sun.COM 		 * Another memseg has page_t's in the same
12210106SJason.Beloro@Sun.COM 		 * page which 'epp' resides.  This would happen
12310106SJason.Beloro@Sun.COM 		 * if PAGESIZE is not an integral multiple of
12410106SJason.Beloro@Sun.COM 		 * sizeof (page_t) and therefore 'epp'
12510106SJason.Beloro@Sun.COM 		 * does not start on a page boundry.
12610106SJason.Beloro@Sun.COM 		 *
12710106SJason.Beloro@Sun.COM 		 * Since the other memseg's pages_t's still
12810106SJason.Beloro@Sun.COM 		 * map valid pages, skip allocation of this page.
12910106SJason.Beloro@Sun.COM 		 */
13010106SJason.Beloro@Sun.COM 		ASSERT(PAGESIZE % sizeof (page_t));
13110106SJason.Beloro@Sun.COM 		metapgs--;
13210106SJason.Beloro@Sun.COM 	}
13310106SJason.Beloro@Sun.COM 
13410106SJason.Beloro@Sun.COM 	ASSERT(IS_P2ALIGNED((uint64_t)pp, PAGESIZE));
13510106SJason.Beloro@Sun.COM 
13610106SJason.Beloro@Sun.COM 	/*
13710106SJason.Beloro@Sun.COM 	 * Back metadata space with physical pages.
13810106SJason.Beloro@Sun.COM 	 */
13910106SJason.Beloro@Sun.COM 	kseg.s_as = &kas;
14010106SJason.Beloro@Sun.COM 	vaddr = (caddr_t)pp;
14110106SJason.Beloro@Sun.COM 
14210106SJason.Beloro@Sun.COM 	for (i = 0; i < metapgs; i++)
143*11185SSean.McEnroe@Sun.COM 		if (page_find(&mpvp, (u_offset_t)(vaddr + i * PAGESIZE)))
14410106SJason.Beloro@Sun.COM 			panic("page_find(0x%p, %p)\n",
145*11185SSean.McEnroe@Sun.COM 			    (void *)&mpvp, (void *)(vaddr + i * PAGESIZE));
14610106SJason.Beloro@Sun.COM 
14710106SJason.Beloro@Sun.COM 	/*
14810106SJason.Beloro@Sun.COM 	 * Allocate the metadata pages; these are the pages that will
14910106SJason.Beloro@Sun.COM 	 * contain the page_t's for the incoming memory.
15010106SJason.Beloro@Sun.COM 	 */
151*11185SSean.McEnroe@Sun.COM 	if ((page_create_va(&mpvp, (u_offset_t)pp, ptob(metapgs),
15210106SJason.Beloro@Sun.COM 	    PG_NORELOC | PG_EXCL, &kseg, vaddr)) == NULL) {
153*11185SSean.McEnroe@Sun.COM 		MEMSEG_DEBUG("memseg_alloc_meta: can't get 0x%ld metapgs",
15410106SJason.Beloro@Sun.COM 		    metapgs);
15510106SJason.Beloro@Sun.COM 		return (KPHYSM_ERESOURCE);
15610106SJason.Beloro@Sun.COM 	}
15710106SJason.Beloro@Sun.COM 
15810106SJason.Beloro@Sun.COM 	ASSERT(ptp);
15910106SJason.Beloro@Sun.COM 	ASSERT(metap);
16010106SJason.Beloro@Sun.COM 
16110106SJason.Beloro@Sun.COM 	*ptp = (void *)opp;
16210106SJason.Beloro@Sun.COM 	*metap = metapgs;
16310106SJason.Beloro@Sun.COM 
16410106SJason.Beloro@Sun.COM 	return (KPHYSM_OK);
16510106SJason.Beloro@Sun.COM }
16610106SJason.Beloro@Sun.COM 
16710106SJason.Beloro@Sun.COM void
memseg_free_meta(void * ptp,pgcnt_t metapgs)16810106SJason.Beloro@Sun.COM memseg_free_meta(void *ptp, pgcnt_t metapgs)
16910106SJason.Beloro@Sun.COM {
17010106SJason.Beloro@Sun.COM 	int i;
17110106SJason.Beloro@Sun.COM 	page_t *pp;
17210106SJason.Beloro@Sun.COM 	u_offset_t off;
17310106SJason.Beloro@Sun.COM 
17410106SJason.Beloro@Sun.COM 	if (!metapgs)
17510106SJason.Beloro@Sun.COM 		return;
17610106SJason.Beloro@Sun.COM 
17710106SJason.Beloro@Sun.COM 	off = (u_offset_t)ptp;
17810106SJason.Beloro@Sun.COM 
17910106SJason.Beloro@Sun.COM 	ASSERT(off);
18010106SJason.Beloro@Sun.COM 	ASSERT(IS_P2ALIGNED((uint64_t)off, PAGESIZE));
18110106SJason.Beloro@Sun.COM 
18210106SJason.Beloro@Sun.COM 	MEMSEG_DEBUG("memseg_free_meta: off=0x%lx metapgs=0x%lx\n",
18310106SJason.Beloro@Sun.COM 	    (uint64_t)off, metapgs);
18410106SJason.Beloro@Sun.COM 	/*
18510106SJason.Beloro@Sun.COM 	 * Free pages allocated during add.
18610106SJason.Beloro@Sun.COM 	 */
18710106SJason.Beloro@Sun.COM 	for (i = 0; i < metapgs; i++) {
188*11185SSean.McEnroe@Sun.COM 		pp = page_find(&mpvp, off);
18910106SJason.Beloro@Sun.COM 		ASSERT(pp);
19010106SJason.Beloro@Sun.COM 		ASSERT(pp->p_szc == 0);
19110106SJason.Beloro@Sun.COM 		page_io_unlock(pp);
19210106SJason.Beloro@Sun.COM 		page_destroy(pp, 0);
19310106SJason.Beloro@Sun.COM 		off += PAGESIZE;
19410106SJason.Beloro@Sun.COM 	}
19510106SJason.Beloro@Sun.COM }
19610106SJason.Beloro@Sun.COM 
19710106SJason.Beloro@Sun.COM pfn_t
memseg_get_metapfn(void * ptp,pgcnt_t metapg)19810106SJason.Beloro@Sun.COM memseg_get_metapfn(void *ptp, pgcnt_t metapg)
19910106SJason.Beloro@Sun.COM {
20010106SJason.Beloro@Sun.COM 	page_t *pp;
20110106SJason.Beloro@Sun.COM 	u_offset_t off;
20210106SJason.Beloro@Sun.COM 
20310106SJason.Beloro@Sun.COM 	off = (u_offset_t)ptp + ptob(metapg);
20410106SJason.Beloro@Sun.COM 
20510106SJason.Beloro@Sun.COM 	ASSERT(off);
20610106SJason.Beloro@Sun.COM 	ASSERT(IS_P2ALIGNED((uint64_t)off, PAGESIZE));
20710106SJason.Beloro@Sun.COM 
208*11185SSean.McEnroe@Sun.COM 	pp = page_find(&mpvp, off);
20910106SJason.Beloro@Sun.COM 	ASSERT(pp);
21010106SJason.Beloro@Sun.COM 	ASSERT(pp->p_szc == 0);
21110106SJason.Beloro@Sun.COM 	ASSERT(pp->p_pagenum != PFN_INVALID);
21210106SJason.Beloro@Sun.COM 
21310106SJason.Beloro@Sun.COM 	return (pp->p_pagenum);
21410106SJason.Beloro@Sun.COM }
21510106SJason.Beloro@Sun.COM 
21610106SJason.Beloro@Sun.COM /*
21710106SJason.Beloro@Sun.COM  * Remap a memseg's page_t's to dummy pages.  Skip the low/high
21810106SJason.Beloro@Sun.COM  * ends of the range if they are already in use.
21910106SJason.Beloro@Sun.COM  */
22010106SJason.Beloro@Sun.COM void
memseg_remap_meta(struct memseg * seg)22110106SJason.Beloro@Sun.COM memseg_remap_meta(struct memseg *seg)
22210106SJason.Beloro@Sun.COM {
22310106SJason.Beloro@Sun.COM 	int i;
22410106SJason.Beloro@Sun.COM 	u_offset_t off;
22510106SJason.Beloro@Sun.COM 	page_t *pp;
22610106SJason.Beloro@Sun.COM #if 0
22710106SJason.Beloro@Sun.COM 	page_t *epp;
22810106SJason.Beloro@Sun.COM #endif
22910106SJason.Beloro@Sun.COM 	pgcnt_t metapgs;
23010106SJason.Beloro@Sun.COM 
23110106SJason.Beloro@Sun.COM 	metapgs = btopr(MSEG_NPAGES(seg) * sizeof (page_t));
23210106SJason.Beloro@Sun.COM 	ASSERT(metapgs);
23310106SJason.Beloro@Sun.COM 	pp = seg->pages;
23410106SJason.Beloro@Sun.COM 	seg->pages_end = seg->pages_base;
23510106SJason.Beloro@Sun.COM #if 0
23610106SJason.Beloro@Sun.COM 	epp = seg->epages;
23710106SJason.Beloro@Sun.COM 
23810106SJason.Beloro@Sun.COM 	/*
23910106SJason.Beloro@Sun.COM 	 * This code cannot be tested as the kernel does not compile
24010106SJason.Beloro@Sun.COM 	 * when page_t size is changed.  It is left here as a starting
24110106SJason.Beloro@Sun.COM 	 * point if the unaligned page_t size needs to be supported.
24210106SJason.Beloro@Sun.COM 	 */
24310106SJason.Beloro@Sun.COM 
24410106SJason.Beloro@Sun.COM 	if (!IS_P2ALIGNED((uint64_t)pp, PAGESIZE) &&
245*11185SSean.McEnroe@Sun.COM 	    page_find(&mpvp, (u_offset_t)(pp - 1)) && !page_deleted(pp - 1)) {
24610106SJason.Beloro@Sun.COM 		/*
24710106SJason.Beloro@Sun.COM 		 * Another memseg has page_t's in the same
24810106SJason.Beloro@Sun.COM 		 * page which 'pp' resides.  This would happen
24910106SJason.Beloro@Sun.COM 		 * if PAGESIZE is not an integral multiple of
25010106SJason.Beloro@Sun.COM 		 * sizeof (page_t) and therefore 'seg->pages'
25110106SJason.Beloro@Sun.COM 		 * does not start on a page boundry.
25210106SJason.Beloro@Sun.COM 		 *
25310106SJason.Beloro@Sun.COM 		 * Since the other memseg's pages_t's still
25410106SJason.Beloro@Sun.COM 		 * map valid pages, skip remap of this page.
25510106SJason.Beloro@Sun.COM 		 * Advance 'pp' to the next page which should
25610106SJason.Beloro@Sun.COM 		 * belong only to the outgoing memseg.
25710106SJason.Beloro@Sun.COM 		 *
25810106SJason.Beloro@Sun.COM 		 * If the last page_t in the current page
25910106SJason.Beloro@Sun.COM 		 * crosses a page boundary, this should still
26010106SJason.Beloro@Sun.COM 		 * work.  The first part of the page_t is
26110106SJason.Beloro@Sun.COM 		 * valid since memseg_lock_delete_all() has
26210106SJason.Beloro@Sun.COM 		 * been called.  The second part of the page_t
26310106SJason.Beloro@Sun.COM 		 * will be remapped to the corresponding
26410106SJason.Beloro@Sun.COM 		 * dummy page below.
26510106SJason.Beloro@Sun.COM 		 */
26610106SJason.Beloro@Sun.COM 		ASSERT(PAGESIZE % sizeof (page_t));
26710106SJason.Beloro@Sun.COM 		pp = (page_t *)P2ROUNDUP((uint64_t)pp, PAGESIZE);
26810106SJason.Beloro@Sun.COM 		metapgs--;
26910106SJason.Beloro@Sun.COM 	}
27010106SJason.Beloro@Sun.COM 
27110106SJason.Beloro@Sun.COM 	if (!IS_P2ALIGNED((uint64_t)epp, PAGESIZE) &&
272*11185SSean.McEnroe@Sun.COM 	    page_find(&mpvp, (u_offset_t)epp) && !page_deleted(epp)) {
27310106SJason.Beloro@Sun.COM 		/*
27410106SJason.Beloro@Sun.COM 		 * Another memseg has page_t's in the same
27510106SJason.Beloro@Sun.COM 		 * page which 'epp' resides.  This would happen
27610106SJason.Beloro@Sun.COM 		 * if PAGESIZE is not an integral multiple of
27710106SJason.Beloro@Sun.COM 		 * sizeof (page_t) and therefore 'seg->epages'
27810106SJason.Beloro@Sun.COM 		 * does not start on a page boundry.
27910106SJason.Beloro@Sun.COM 		 *
28010106SJason.Beloro@Sun.COM 		 * Since the other memseg's pages_t's still
28110106SJason.Beloro@Sun.COM 		 * map valid pages, skip remap of this page.
28210106SJason.Beloro@Sun.COM 		 */
28310106SJason.Beloro@Sun.COM 		ASSERT(PAGESIZE % sizeof (page_t));
28410106SJason.Beloro@Sun.COM 		metapgs--;
28510106SJason.Beloro@Sun.COM 	}
28610106SJason.Beloro@Sun.COM #endif
28710106SJason.Beloro@Sun.COM 	ASSERT(IS_P2ALIGNED((uint64_t)pp, PAGESIZE));
28810106SJason.Beloro@Sun.COM 
28910106SJason.Beloro@Sun.COM 	remap_to_dummy((caddr_t)pp, metapgs);
29010106SJason.Beloro@Sun.COM 
29110106SJason.Beloro@Sun.COM 	off = (u_offset_t)pp;
29210106SJason.Beloro@Sun.COM 
293*11185SSean.McEnroe@Sun.COM 	MEMSEG_DEBUG("memseg_remap_meta: off=0x%lx metapgs=0x%lx\n",
294*11185SSean.McEnroe@Sun.COM 	    (uint64_t)off, metapgs);
29510106SJason.Beloro@Sun.COM 	/*
29610106SJason.Beloro@Sun.COM 	 * Free pages allocated during add.
29710106SJason.Beloro@Sun.COM 	 */
29810106SJason.Beloro@Sun.COM 	for (i = 0; i < metapgs; i++) {
299*11185SSean.McEnroe@Sun.COM 		pp = page_find(&mpvp, off);
30010106SJason.Beloro@Sun.COM 		ASSERT(pp);
30110106SJason.Beloro@Sun.COM 		ASSERT(pp->p_szc == 0);
30210106SJason.Beloro@Sun.COM 		page_io_unlock(pp);
30310106SJason.Beloro@Sun.COM 		page_destroy(pp, 0);
30410106SJason.Beloro@Sun.COM 		off += PAGESIZE;
30510106SJason.Beloro@Sun.COM 	}
30610106SJason.Beloro@Sun.COM }
307