xref: /onnv-gate/usr/src/uts/sun4v/os/ppage.c (revision 6461:037a423f52ad)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51517Sjb145095  * Common Development and Distribution License (the "License").
61517Sjb145095  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*6461Scb222892  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
270Sstevel@tonic-gate 
280Sstevel@tonic-gate #include <sys/types.h>
290Sstevel@tonic-gate #include <sys/systm.h>
300Sstevel@tonic-gate #include <sys/archsystm.h>
310Sstevel@tonic-gate #include <sys/machsystm.h>
320Sstevel@tonic-gate #include <sys/t_lock.h>
330Sstevel@tonic-gate #include <sys/vmem.h>
340Sstevel@tonic-gate #include <sys/mman.h>
350Sstevel@tonic-gate #include <sys/vm.h>
360Sstevel@tonic-gate #include <sys/cpu.h>
370Sstevel@tonic-gate #include <sys/cmn_err.h>
380Sstevel@tonic-gate #include <sys/cpuvar.h>
390Sstevel@tonic-gate #include <sys/atomic.h>
400Sstevel@tonic-gate #include <vm/as.h>
410Sstevel@tonic-gate #include <vm/hat.h>
420Sstevel@tonic-gate #include <vm/as.h>
430Sstevel@tonic-gate #include <vm/page.h>
440Sstevel@tonic-gate #include <vm/seg.h>
450Sstevel@tonic-gate #include <vm/seg_kmem.h>
46*6461Scb222892 #include <vm/seg_kpm.h>
470Sstevel@tonic-gate #include <vm/hat_sfmmu.h>
480Sstevel@tonic-gate #include <sys/debug.h>
490Sstevel@tonic-gate #include <sys/cpu_module.h>
500Sstevel@tonic-gate 
510Sstevel@tonic-gate /*
520Sstevel@tonic-gate  * A quick way to generate a cache consistent address to map in a page.
530Sstevel@tonic-gate  * users: ppcopy, pagezero, /proc, dev/mem
540Sstevel@tonic-gate  *
550Sstevel@tonic-gate  * The ppmapin/ppmapout routines provide a quick way of generating a cache
560Sstevel@tonic-gate  * consistent address by reserving a given amount of kernel address space.
570Sstevel@tonic-gate  * The base is PPMAPBASE and its size is PPMAPSIZE.  This memory is divided
580Sstevel@tonic-gate  * into x number of sets, where x is the number of colors for the virtual
590Sstevel@tonic-gate  * cache. The number of colors is how many times a page can be mapped
600Sstevel@tonic-gate  * simulatenously in the cache.  For direct map caches this translates to
610Sstevel@tonic-gate  * the number of pages in the cache.
620Sstevel@tonic-gate  * Each set will be assigned a group of virtual pages from the reserved memory
630Sstevel@tonic-gate  * depending on its virtual color.
640Sstevel@tonic-gate  * When trying to assign a virtual address we will find out the color for the
650Sstevel@tonic-gate  * physical page in question (if applicable).  Then we will try to find an
660Sstevel@tonic-gate  * available virtual page from the set of the appropiate color.
670Sstevel@tonic-gate  */
680Sstevel@tonic-gate 
690Sstevel@tonic-gate int pp_slots = 4;		/* small default, tuned by cpu module */
700Sstevel@tonic-gate 
710Sstevel@tonic-gate /* tuned by cpu module, default is "safe" */
720Sstevel@tonic-gate int pp_consistent_coloring = PPAGE_STORES_POLLUTE | PPAGE_LOADS_POLLUTE;
730Sstevel@tonic-gate 
740Sstevel@tonic-gate static caddr_t	ppmap_vaddrs[PPMAPSIZE / MMU_PAGESIZE];
750Sstevel@tonic-gate static int	nsets;			/* number of sets */
760Sstevel@tonic-gate static int	ppmap_shift;		/* set selector */
770Sstevel@tonic-gate 
780Sstevel@tonic-gate #ifdef PPDEBUG
790Sstevel@tonic-gate #define		MAXCOLORS	16	/* for debug only */
800Sstevel@tonic-gate static int	ppalloc_noslot = 0;	/* # of allocations from kernelmap */
812296Sae112802 static int	align_hits;
820Sstevel@tonic-gate static int	pp_allocs;		/* # of ppmapin requests */
830Sstevel@tonic-gate #endif /* PPDEBUG */
840Sstevel@tonic-gate 
850Sstevel@tonic-gate /*
860Sstevel@tonic-gate  * There are only 64 TLB entries on spitfire, 16 on cheetah
870Sstevel@tonic-gate  * (fully-associative TLB) so we allow the cpu module to tune the
880Sstevel@tonic-gate  * number to use here via pp_slots.
890Sstevel@tonic-gate  */
900Sstevel@tonic-gate static struct ppmap_va {
910Sstevel@tonic-gate 	caddr_t	ppmap_slots[MAXPP_SLOTS];
920Sstevel@tonic-gate } ppmap_va[NCPU];
930Sstevel@tonic-gate 
942296Sae112802 /* prevent compilation with VAC defined */
952296Sae112802 #ifdef VAC
962296Sae112802 #error "sun4v ppmapin and ppmapout do not support VAC"
972296Sae112802 #endif
982296Sae112802 
990Sstevel@tonic-gate void
ppmapinit(void)1000Sstevel@tonic-gate ppmapinit(void)
1010Sstevel@tonic-gate {
1022296Sae112802 	int nset;
1030Sstevel@tonic-gate 	caddr_t va;
1040Sstevel@tonic-gate 
1050Sstevel@tonic-gate 	ASSERT(pp_slots <= MAXPP_SLOTS);
1060Sstevel@tonic-gate 
1070Sstevel@tonic-gate 	va = (caddr_t)PPMAPBASE;
1080Sstevel@tonic-gate 
1092296Sae112802 	/*
1102296Sae112802 	 * sun4v does not have a virtual indexed cache and simply
1112296Sae112802 	 * has only one set containing all pages.
1122296Sae112802 	 */
1132296Sae112802 	nsets = mmu_btop(PPMAPSIZE);
1142296Sae112802 	ppmap_shift = MMU_PAGESHIFT;
1152296Sae112802 
1162296Sae112802 	for (nset = 0; nset < nsets; nset++) {
1172296Sae112802 		ppmap_vaddrs[nset] =
1182296Sae112802 		    (caddr_t)((uintptr_t)va + (nset * MMU_PAGESIZE));
1190Sstevel@tonic-gate 	}
1200Sstevel@tonic-gate }
1210Sstevel@tonic-gate 
1220Sstevel@tonic-gate /*
1230Sstevel@tonic-gate  * Allocate a cache consistent virtual address to map a page, pp,
1240Sstevel@tonic-gate  * with protection, vprot; and map it in the MMU, using the most
1250Sstevel@tonic-gate  * efficient means possible.  The argument avoid is a virtual address
1260Sstevel@tonic-gate  * hint which when masked yields an offset into a virtual cache
1270Sstevel@tonic-gate  * that should be avoided when allocating an address to map in a
1280Sstevel@tonic-gate  * page.  An avoid arg of -1 means you don't care, for instance pagezero.
1290Sstevel@tonic-gate  *
1300Sstevel@tonic-gate  * machine dependent, depends on virtual address space layout,
1310Sstevel@tonic-gate  * understands that all kernel addresses have bit 31 set.
1320Sstevel@tonic-gate  *
1330Sstevel@tonic-gate  * NOTE: For sun4 platforms the meaning of the hint argument is opposite from
1340Sstevel@tonic-gate  * that found in other architectures.  In other architectures the hint
1350Sstevel@tonic-gate  * (called avoid) was used to ask ppmapin to NOT use the specified cache color.
1360Sstevel@tonic-gate  * This was used to avoid virtual cache trashing in the bcopy.  Unfortunately
1370Sstevel@tonic-gate  * in the case of a COW,  this later on caused a cache aliasing conflict.  In
1380Sstevel@tonic-gate  * sun4, the bcopy routine uses the block ld/st instructions so we don't have
1390Sstevel@tonic-gate  * to worry about virtual cache trashing.  Actually, by using the hint to choose
1400Sstevel@tonic-gate  * the right color we can almost guarantee a cache conflict will not occur.
1410Sstevel@tonic-gate  */
1420Sstevel@tonic-gate 
1432296Sae112802 /*ARGSUSED2*/
1440Sstevel@tonic-gate caddr_t
ppmapin(page_t * pp,uint_t vprot,caddr_t hint)1450Sstevel@tonic-gate ppmapin(page_t *pp, uint_t vprot, caddr_t hint)
1460Sstevel@tonic-gate {
1472296Sae112802 	int nset;
1480Sstevel@tonic-gate 	caddr_t va;
1490Sstevel@tonic-gate 
1500Sstevel@tonic-gate #ifdef PPDEBUG
1510Sstevel@tonic-gate 	pp_allocs++;
1520Sstevel@tonic-gate #endif /* PPDEBUG */
1532296Sae112802 
1542296Sae112802 	/*
1552296Sae112802 	 * For sun4v caches are physical caches, we can pick any address
1562296Sae112802 	 * we want.
1572296Sae112802 	 */
1582296Sae112802 	for (nset = 0; nset < nsets; nset++) {
1592296Sae112802 		va = ppmap_vaddrs[nset];
1602296Sae112802 		if (va != NULL) {
1612296Sae112802 #ifdef PPDEBUG
1622296Sae112802 			align_hits++;
1632296Sae112802 #endif /* PPDEBUG */
1642296Sae112802 			if (casptr(&ppmap_vaddrs[nset], va, NULL) == va) {
1652296Sae112802 				hat_memload(kas.a_hat, va, pp,
166*6461Scb222892 				    vprot | HAT_NOSYNC,
167*6461Scb222892 				    HAT_LOAD_LOCK);
1682296Sae112802 				return (va);
1690Sstevel@tonic-gate 			}
1700Sstevel@tonic-gate 		}
1710Sstevel@tonic-gate 	}
1720Sstevel@tonic-gate 
1730Sstevel@tonic-gate #ifdef PPDEBUG
1740Sstevel@tonic-gate 	ppalloc_noslot++;
1750Sstevel@tonic-gate #endif /* PPDEBUG */
1760Sstevel@tonic-gate 
1770Sstevel@tonic-gate 	/*
1780Sstevel@tonic-gate 	 * No free slots; get a random one from the kernel heap area.
1790Sstevel@tonic-gate 	 */
1800Sstevel@tonic-gate 	va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
1810Sstevel@tonic-gate 
1820Sstevel@tonic-gate 	hat_memload(kas.a_hat, va, pp, vprot | HAT_NOSYNC, HAT_LOAD_LOCK);
1830Sstevel@tonic-gate 
1840Sstevel@tonic-gate 	return (va);
1850Sstevel@tonic-gate 
1860Sstevel@tonic-gate }
1870Sstevel@tonic-gate 
1880Sstevel@tonic-gate void
ppmapout(caddr_t va)1890Sstevel@tonic-gate ppmapout(caddr_t va)
1900Sstevel@tonic-gate {
1912296Sae112802 	int nset;
1920Sstevel@tonic-gate 
1930Sstevel@tonic-gate 	if (va >= kernelheap && va < ekernelheap) {
1940Sstevel@tonic-gate 		/*
1950Sstevel@tonic-gate 		 * Space came from kernelmap, flush the page and
1960Sstevel@tonic-gate 		 * return the space.
1970Sstevel@tonic-gate 		 */
1980Sstevel@tonic-gate 		hat_unload(kas.a_hat, va, PAGESIZE,
1990Sstevel@tonic-gate 		    (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
2000Sstevel@tonic-gate 		vmem_free(heap_arena, va, PAGESIZE);
2010Sstevel@tonic-gate 	} else {
2020Sstevel@tonic-gate 		/*
2030Sstevel@tonic-gate 		 * Space came from ppmap_vaddrs[], give it back.
2040Sstevel@tonic-gate 		 */
2050Sstevel@tonic-gate 		nset = ((uintptr_t)va >> ppmap_shift) & (nsets - 1);
2060Sstevel@tonic-gate 		hat_unload(kas.a_hat, va, PAGESIZE,
2070Sstevel@tonic-gate 		    (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
2080Sstevel@tonic-gate 
2092296Sae112802 		ASSERT(ppmap_vaddrs[nset] == NULL);
2102296Sae112802 		ppmap_vaddrs[nset] = va;
2110Sstevel@tonic-gate 	}
2120Sstevel@tonic-gate }
2130Sstevel@tonic-gate 
2140Sstevel@tonic-gate #ifdef DEBUG
2150Sstevel@tonic-gate #define	PP_STAT_ADD(stat)	(stat)++
2160Sstevel@tonic-gate uint_t pload, ploadfail;
2170Sstevel@tonic-gate uint_t ppzero, ppzero_short;
2180Sstevel@tonic-gate #else
2190Sstevel@tonic-gate #define	PP_STAT_ADD(stat)
2200Sstevel@tonic-gate #endif /* DEBUG */
2210Sstevel@tonic-gate 
2220Sstevel@tonic-gate static void
pp_unload_tlb(caddr_t * pslot,caddr_t va)2230Sstevel@tonic-gate pp_unload_tlb(caddr_t *pslot, caddr_t va)
2240Sstevel@tonic-gate {
2250Sstevel@tonic-gate 	ASSERT(*pslot == va);
2260Sstevel@tonic-gate 
2272241Shuah 	vtag_flushpage(va, (uint64_t)ksfmmup);
2280Sstevel@tonic-gate 	*pslot = NULL;				/* release the slot */
2290Sstevel@tonic-gate }
2300Sstevel@tonic-gate 
2310Sstevel@tonic-gate /*
2320Sstevel@tonic-gate  * Routine to copy kernel pages during relocation.  It will copy one
2330Sstevel@tonic-gate  * PAGESIZE page to another PAGESIZE page.  This function may be called
2340Sstevel@tonic-gate  * above LOCK_LEVEL so it should not grab any locks.
2350Sstevel@tonic-gate  */
2360Sstevel@tonic-gate void
ppcopy_kernel__relocatable(page_t * fm_pp,page_t * to_pp)2370Sstevel@tonic-gate ppcopy_kernel__relocatable(page_t *fm_pp, page_t *to_pp)
2380Sstevel@tonic-gate {
2390Sstevel@tonic-gate 	uint64_t fm_pa, to_pa;
2400Sstevel@tonic-gate 	size_t nbytes;
2410Sstevel@tonic-gate 
2420Sstevel@tonic-gate 	fm_pa = (uint64_t)(fm_pp->p_pagenum) << MMU_PAGESHIFT;
2430Sstevel@tonic-gate 	to_pa = (uint64_t)(to_pp->p_pagenum) << MMU_PAGESHIFT;
2440Sstevel@tonic-gate 
2450Sstevel@tonic-gate 	nbytes = MMU_PAGESIZE;
2460Sstevel@tonic-gate 
2470Sstevel@tonic-gate 	for (; nbytes > 0; fm_pa += 32, to_pa += 32, nbytes -= 32)
2480Sstevel@tonic-gate 		hw_pa_bcopy32(fm_pa, to_pa);
2490Sstevel@tonic-gate }
2500Sstevel@tonic-gate 
2510Sstevel@tonic-gate /*
2520Sstevel@tonic-gate  * Copy the data from the physical page represented by "frompp" to
2530Sstevel@tonic-gate  * that represented by "topp".
2540Sstevel@tonic-gate  *
2550Sstevel@tonic-gate  * Try to use per cpu mapping first, if that fails then call pp_mapin
2560Sstevel@tonic-gate  * to load it.
2573253Smec  * Returns one on success or zero on some sort of fault while doing the copy.
2580Sstevel@tonic-gate  */
2593253Smec int
ppcopy(page_t * fm_pp,page_t * to_pp)2600Sstevel@tonic-gate ppcopy(page_t *fm_pp, page_t *to_pp)
2610Sstevel@tonic-gate {
262*6461Scb222892 	caddr_t fm_va = NULL;
2631517Sjb145095 	caddr_t to_va;
2641517Sjb145095 	boolean_t fast;
2653253Smec 	label_t ljb;
2663253Smec 	int ret = 1;
2671517Sjb145095 
2681517Sjb145095 	ASSERT(PAGE_LOCKED(fm_pp));
2691517Sjb145095 	ASSERT(PAGE_LOCKED(to_pp));
2700Sstevel@tonic-gate 
2711517Sjb145095 	/*
272*6461Scb222892 	 * Try to map using KPM if enabled.  If it fails, fall
273*6461Scb222892 	 * back to ppmapin/ppmapout.
2741517Sjb145095 	 */
275*6461Scb222892 	if ((kpm_enable == 0) ||
276*6461Scb222892 	    (fm_va = hat_kpm_mapin(fm_pp, NULL)) == NULL ||
2771517Sjb145095 	    (to_va = hat_kpm_mapin(to_pp, NULL)) == NULL) {
2781517Sjb145095 		if (fm_va != NULL)
2791517Sjb145095 			hat_kpm_mapout(fm_pp, NULL, fm_va);
2801517Sjb145095 		fm_va = ppmapin(fm_pp, PROT_READ, (caddr_t)-1);
2811517Sjb145095 		to_va = ppmapin(to_pp, PROT_READ | PROT_WRITE, fm_va);
2821517Sjb145095 		fast = B_FALSE;
2831517Sjb145095 	} else
2841517Sjb145095 		fast = B_TRUE;
2851517Sjb145095 
2863253Smec 	if (on_fault(&ljb)) {
2873253Smec 		ret = 0;
2883253Smec 		goto faulted;
2893253Smec 	}
2900Sstevel@tonic-gate 	bcopy(fm_va, to_va, PAGESIZE);
2913253Smec 	no_fault();
2923253Smec faulted:
2931517Sjb145095 
2941517Sjb145095 	/* Unmap */
2951517Sjb145095 	if (fast) {
2961517Sjb145095 		hat_kpm_mapout(fm_pp, NULL, fm_va);
2971517Sjb145095 		hat_kpm_mapout(to_pp, NULL, to_va);
2981517Sjb145095 	} else {
2991517Sjb145095 		ppmapout(fm_va);
3001517Sjb145095 		ppmapout(to_va);
3011517Sjb145095 	}
3023253Smec 	return (ret);
3030Sstevel@tonic-gate }
3040Sstevel@tonic-gate 
3050Sstevel@tonic-gate /*
3060Sstevel@tonic-gate  * Zero the physical page from off to off + len given by `pp'
3070Sstevel@tonic-gate  * without changing the reference and modified bits of page.
3080Sstevel@tonic-gate  *
3090Sstevel@tonic-gate  * Again, we'll try per cpu mapping first.
3100Sstevel@tonic-gate  */
3111517Sjb145095 
3120Sstevel@tonic-gate void
pagezero(page_t * pp,uint_t off,uint_t len)3130Sstevel@tonic-gate pagezero(page_t *pp, uint_t off, uint_t len)
3140Sstevel@tonic-gate {
3150Sstevel@tonic-gate 	caddr_t va;
3160Sstevel@tonic-gate 	extern int hwblkclr(void *, size_t);
3170Sstevel@tonic-gate 	extern int use_hw_bzero;
3181517Sjb145095 	boolean_t fast;
3190Sstevel@tonic-gate 
3200Sstevel@tonic-gate 	ASSERT((int)len > 0 && (int)off >= 0 && off + len <= PAGESIZE);
3210Sstevel@tonic-gate 	ASSERT(PAGE_LOCKED(pp));
3220Sstevel@tonic-gate 
3230Sstevel@tonic-gate 	PP_STAT_ADD(ppzero);
3240Sstevel@tonic-gate 
3250Sstevel@tonic-gate 	if (len != MMU_PAGESIZE || !use_hw_bzero) {
3260Sstevel@tonic-gate 		PP_STAT_ADD(ppzero_short);
3270Sstevel@tonic-gate 	}
3280Sstevel@tonic-gate 
3290Sstevel@tonic-gate 	kpreempt_disable();
3300Sstevel@tonic-gate 
3311517Sjb145095 	/*
332*6461Scb222892 	 * Try to use KPM if enabled.  If that fails, fall back to
3331517Sjb145095 	 * ppmapin/ppmapout.
3341517Sjb145095 	 */
335*6461Scb222892 
336*6461Scb222892 	if (kpm_enable != 0) {
337*6461Scb222892 		fast = B_TRUE;
338*6461Scb222892 		va = hat_kpm_mapin(pp, NULL);
339*6461Scb222892 	} else
340*6461Scb222892 		va = NULL;
341*6461Scb222892 
3421517Sjb145095 	if (va == NULL) {
3431517Sjb145095 		fast = B_FALSE;
3441517Sjb145095 		va = ppmapin(pp, PROT_READ | PROT_WRITE, (caddr_t)-1);
3451517Sjb145095 	}
3460Sstevel@tonic-gate 
3470Sstevel@tonic-gate 	if (!use_hw_bzero) {
3480Sstevel@tonic-gate 		bzero(va + off, len);
3490Sstevel@tonic-gate 		sync_icache(va + off, len);
3500Sstevel@tonic-gate 	} else if (hwblkclr(va + off, len)) {
3510Sstevel@tonic-gate 		/*
3520Sstevel@tonic-gate 		 * We may not have used block commit asi.
3530Sstevel@tonic-gate 		 * So flush the I-$ manually
3540Sstevel@tonic-gate 		 */
3550Sstevel@tonic-gate 		sync_icache(va + off, len);
3560Sstevel@tonic-gate 	} else {
3570Sstevel@tonic-gate 		/*
3581517Sjb145095 		 * We have used blk commit, and flushed the I-$.
3591517Sjb145095 		 * However we still may have an instruction in the
3601517Sjb145095 		 * pipeline. Only a flush will invalidate that.
3610Sstevel@tonic-gate 		 */
3620Sstevel@tonic-gate 		doflush(va);
3630Sstevel@tonic-gate 	}
3640Sstevel@tonic-gate 
3651517Sjb145095 	if (fast) {
3661517Sjb145095 		hat_kpm_mapout(pp, NULL, va);
3671517Sjb145095 	} else {
3681517Sjb145095 		ppmapout(va);
3691517Sjb145095 	}
3700Sstevel@tonic-gate 	kpreempt_enable();
3710Sstevel@tonic-gate }
372