xref: /onnv-gate/usr/src/uts/common/os/bp_map.c (revision 5251:aad4f9e16063)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
52236Scth  * Common Development and Distribution License (the "License").
62236Scth  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*5251Smrj  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
270Sstevel@tonic-gate 
280Sstevel@tonic-gate #include <sys/types.h>
290Sstevel@tonic-gate #include <sys/sysmacros.h>
300Sstevel@tonic-gate #include <sys/systm.h>
310Sstevel@tonic-gate #include <sys/mman.h>
320Sstevel@tonic-gate #include <sys/buf.h>
330Sstevel@tonic-gate #include <sys/vmem.h>
340Sstevel@tonic-gate #include <sys/cmn_err.h>
350Sstevel@tonic-gate #include <sys/debug.h>
360Sstevel@tonic-gate #include <sys/machparam.h>
370Sstevel@tonic-gate #include <vm/page.h>
380Sstevel@tonic-gate #include <vm/seg_kmem.h>
391299Scth #include <vm/seg_kpm.h>
400Sstevel@tonic-gate 
410Sstevel@tonic-gate #ifdef __sparc
420Sstevel@tonic-gate #include <sys/cpu_module.h>
430Sstevel@tonic-gate #define	BP_FLUSH(addr, size)	flush_instr_mem((void *)addr, size);
440Sstevel@tonic-gate #else
450Sstevel@tonic-gate #define	BP_FLUSH(addr, size)
460Sstevel@tonic-gate #endif
470Sstevel@tonic-gate 
48*5251Smrj int bp_force_copy = 0;
49*5251Smrj typedef enum {
50*5251Smrj 	BP_COPYIN	= 0,
51*5251Smrj 	BP_COPYOUT	= 1
52*5251Smrj } bp_copydir_t;
53*5251Smrj static int bp_copy_common(bp_copydir_t dir, struct buf *bp, void *driverbuf,
54*5251Smrj     offset_t offset, size_t size);
55*5251Smrj 
560Sstevel@tonic-gate static vmem_t *bp_map_arena;
570Sstevel@tonic-gate static size_t bp_align;
580Sstevel@tonic-gate static uint_t bp_devload_flags = PROT_READ | PROT_WRITE | HAT_NOSYNC;
591299Scth int	bp_max_cache = 1 << 17;		/* 128K default; tunable */
601299Scth int	bp_mapin_kpm_enable = 1;	/* enable default; tunable */
610Sstevel@tonic-gate 
620Sstevel@tonic-gate static void *
bp_vmem_alloc(vmem_t * vmp,size_t size,int vmflag)630Sstevel@tonic-gate bp_vmem_alloc(vmem_t *vmp, size_t size, int vmflag)
640Sstevel@tonic-gate {
650Sstevel@tonic-gate 	return (vmem_xalloc(vmp, size, bp_align, 0, 0, NULL, NULL, vmflag));
660Sstevel@tonic-gate }
670Sstevel@tonic-gate 
680Sstevel@tonic-gate void
bp_init(size_t align,uint_t devload_flags)690Sstevel@tonic-gate bp_init(size_t align, uint_t devload_flags)
700Sstevel@tonic-gate {
710Sstevel@tonic-gate 	bp_align = MAX(align, PAGESIZE);
720Sstevel@tonic-gate 	bp_devload_flags |= devload_flags;
730Sstevel@tonic-gate 
740Sstevel@tonic-gate 	if (bp_align <= bp_max_cache)
750Sstevel@tonic-gate 		bp_map_arena = vmem_create("bp_map", NULL, 0, bp_align,
760Sstevel@tonic-gate 		    bp_vmem_alloc, vmem_free, heap_arena,
770Sstevel@tonic-gate 		    MIN(8 * bp_align, bp_max_cache), VM_SLEEP);
780Sstevel@tonic-gate }
790Sstevel@tonic-gate 
800Sstevel@tonic-gate /*
810Sstevel@tonic-gate  * common routine so can be called with/without VM_SLEEP
820Sstevel@tonic-gate  */
830Sstevel@tonic-gate void *
bp_mapin_common(struct buf * bp,int flag)840Sstevel@tonic-gate bp_mapin_common(struct buf *bp, int flag)
850Sstevel@tonic-gate {
861299Scth 	struct as	*as;
871299Scth 	pfn_t		pfnum;
881299Scth 	page_t		*pp;
891299Scth 	page_t		**pplist;
901299Scth 	caddr_t		kaddr;
911299Scth 	caddr_t		addr;
921299Scth 	uintptr_t	off;
931299Scth 	size_t		size;
941299Scth 	pgcnt_t		npages;
951299Scth 	int		color;
960Sstevel@tonic-gate 
970Sstevel@tonic-gate 	/* return if already mapped in, no pageio/physio, or physio to kas */
980Sstevel@tonic-gate 	if ((bp->b_flags & B_REMAPPED) ||
990Sstevel@tonic-gate 	    !(bp->b_flags & (B_PAGEIO | B_PHYS)) ||
1000Sstevel@tonic-gate 	    (((bp->b_flags & (B_PAGEIO | B_PHYS)) == B_PHYS) &&
1010Sstevel@tonic-gate 	    ((bp->b_proc == NULL) || (bp->b_proc->p_as == &kas))))
1020Sstevel@tonic-gate 		return (bp->b_un.b_addr);
1030Sstevel@tonic-gate 
1040Sstevel@tonic-gate 	ASSERT((bp->b_flags & (B_PAGEIO | B_PHYS)) != (B_PAGEIO | B_PHYS));
1050Sstevel@tonic-gate 
1061299Scth 	addr = (caddr_t)bp->b_un.b_addr;
1071299Scth 	off = (uintptr_t)addr & PAGEOFFSET;
1081299Scth 	size = P2ROUNDUP(bp->b_bcount + off, PAGESIZE);
1091299Scth 	npages = btop(size);
1101299Scth 
1111299Scth 	/* Fastpath single page IO to locked memory by using kpm. */
1121299Scth 	if ((bp->b_flags & (B_SHADOW | B_PAGEIO)) && (npages == 1) &&
1131299Scth 	    kpm_enable && bp_mapin_kpm_enable) {
1141299Scth 		if (bp->b_flags & B_SHADOW)
1151299Scth 			pp = *bp->b_shadow;
1161299Scth 		else
1171299Scth 			pp = bp->b_pages;
1181299Scth 		kaddr = hat_kpm_mapin(pp, NULL);
1191299Scth 		bp->b_un.b_addr = kaddr + off;
1201299Scth 		bp->b_flags |= B_REMAPPED;
1211299Scth 		return (bp->b_un.b_addr);
1221299Scth 	}
1231299Scth 
1240Sstevel@tonic-gate 	/*
1250Sstevel@tonic-gate 	 * Allocate kernel virtual space for remapping.
1260Sstevel@tonic-gate 	 */
1270Sstevel@tonic-gate 	color = bp_color(bp);
1280Sstevel@tonic-gate 	ASSERT(color < bp_align);
1290Sstevel@tonic-gate 
1300Sstevel@tonic-gate 	if (bp_map_arena != NULL) {
1310Sstevel@tonic-gate 		kaddr = (caddr_t)vmem_alloc(bp_map_arena,
1320Sstevel@tonic-gate 		    P2ROUNDUP(color + size, bp_align), flag);
1330Sstevel@tonic-gate 		if (kaddr == NULL)
1340Sstevel@tonic-gate 			return (NULL);
1350Sstevel@tonic-gate 		kaddr += color;
1360Sstevel@tonic-gate 	} else {
1370Sstevel@tonic-gate 		kaddr = vmem_xalloc(heap_arena, size, bp_align, color,
1380Sstevel@tonic-gate 		    0, NULL, NULL, flag);
1390Sstevel@tonic-gate 		if (kaddr == NULL)
1400Sstevel@tonic-gate 			return (NULL);
1410Sstevel@tonic-gate 	}
1420Sstevel@tonic-gate 
1430Sstevel@tonic-gate 	ASSERT(P2PHASE((uintptr_t)kaddr, bp_align) == color);
1440Sstevel@tonic-gate 
1450Sstevel@tonic-gate 	/*
1460Sstevel@tonic-gate 	 * Map bp into the virtual space we just allocated.
1470Sstevel@tonic-gate 	 */
1480Sstevel@tonic-gate 	if (bp->b_flags & B_PAGEIO) {
1490Sstevel@tonic-gate 		pp = bp->b_pages;
1501299Scth 		pplist = NULL;
1510Sstevel@tonic-gate 	} else if (bp->b_flags & B_SHADOW) {
1521299Scth 		pp = NULL;
1530Sstevel@tonic-gate 		pplist = bp->b_shadow;
1540Sstevel@tonic-gate 	} else {
1551299Scth 		pp = NULL;
1561299Scth 		pplist = NULL;
1570Sstevel@tonic-gate 		if (bp->b_proc == NULL || (as = bp->b_proc->p_as) == NULL)
1580Sstevel@tonic-gate 			as = &kas;
1590Sstevel@tonic-gate 	}
1600Sstevel@tonic-gate 
1610Sstevel@tonic-gate 	bp->b_flags |= B_REMAPPED;
1620Sstevel@tonic-gate 	bp->b_un.b_addr = kaddr + off;
1630Sstevel@tonic-gate 
1640Sstevel@tonic-gate 	while (npages-- != 0) {
1650Sstevel@tonic-gate 		if (pp) {
1660Sstevel@tonic-gate 			pfnum = pp->p_pagenum;
1670Sstevel@tonic-gate 			pp = pp->p_next;
1680Sstevel@tonic-gate 		} else if (pplist == NULL) {
1691299Scth 			pfnum = hat_getpfnum(as->a_hat,
1701299Scth 			    (caddr_t)((uintptr_t)addr & MMU_PAGEMASK));
1711299Scth 			if (pfnum == PFN_INVALID)
1720Sstevel@tonic-gate 				panic("bp_mapin_common: hat_getpfnum for"
1730Sstevel@tonic-gate 				    " addr %p failed\n", (void *)addr);
1740Sstevel@tonic-gate 			addr += PAGESIZE;
1750Sstevel@tonic-gate 		} else {
1760Sstevel@tonic-gate 			pfnum = (*pplist)->p_pagenum;
1770Sstevel@tonic-gate 			pplist++;
1780Sstevel@tonic-gate 		}
1790Sstevel@tonic-gate 
1800Sstevel@tonic-gate 		hat_devload(kas.a_hat, kaddr, PAGESIZE, pfnum,
1810Sstevel@tonic-gate 		    bp_devload_flags, HAT_LOAD_LOCK);
1820Sstevel@tonic-gate 
1830Sstevel@tonic-gate 		kaddr += PAGESIZE;
1840Sstevel@tonic-gate 	}
1850Sstevel@tonic-gate 	return (bp->b_un.b_addr);
1860Sstevel@tonic-gate }
1870Sstevel@tonic-gate 
1880Sstevel@tonic-gate /*
1890Sstevel@tonic-gate  * Convert bp for pageio/physio to a kernel addressable location.
1900Sstevel@tonic-gate  */
1910Sstevel@tonic-gate void
bp_mapin(struct buf * bp)1920Sstevel@tonic-gate bp_mapin(struct buf *bp)
1930Sstevel@tonic-gate {
1940Sstevel@tonic-gate 	(void) bp_mapin_common(bp, VM_SLEEP);
1950Sstevel@tonic-gate }
1960Sstevel@tonic-gate 
1970Sstevel@tonic-gate /*
1980Sstevel@tonic-gate  * Release all the resources associated with a previous bp_mapin() call.
1990Sstevel@tonic-gate  */
2000Sstevel@tonic-gate void
bp_mapout(struct buf * bp)2010Sstevel@tonic-gate bp_mapout(struct buf *bp)
2020Sstevel@tonic-gate {
2031299Scth 	caddr_t		addr;
2041299Scth 	uintptr_t	off;
2051299Scth 	uintptr_t	base;
2061299Scth 	uintptr_t	color;
2071299Scth 	size_t		size;
2081299Scth 	pgcnt_t		npages;
2091299Scth 	page_t		*pp;
2101299Scth 
2111299Scth 	if ((bp->b_flags & B_REMAPPED) == 0)
2121299Scth 		return;
2131299Scth 
2141299Scth 	addr = bp->b_un.b_addr;
2151299Scth 	off = (uintptr_t)addr & PAGEOFFSET;
2161299Scth 	size = P2ROUNDUP(bp->b_bcount + off, PAGESIZE);
2171299Scth 	npages = btop(size);
2181299Scth 
2191299Scth 	bp->b_un.b_addr = (caddr_t)off;		/* debugging aid */
2201299Scth 
2211299Scth 	if ((bp->b_flags & (B_SHADOW | B_PAGEIO)) && (npages == 1) &&
2221299Scth 	    kpm_enable && bp_mapin_kpm_enable) {
2231299Scth 		if (bp->b_flags & B_SHADOW)
2241299Scth 			pp = *bp->b_shadow;
2250Sstevel@tonic-gate 		else
2261299Scth 			pp = bp->b_pages;
2272236Scth 		addr = (caddr_t)((uintptr_t)addr & MMU_PAGEMASK);
2281299Scth 		hat_kpm_mapout(pp, NULL, addr);
2290Sstevel@tonic-gate 		bp->b_flags &= ~B_REMAPPED;
2301299Scth 		return;
2310Sstevel@tonic-gate 	}
2321299Scth 
2331299Scth 	base = (uintptr_t)addr & MMU_PAGEMASK;
2341299Scth 	BP_FLUSH(base, size);
2351299Scth 	hat_unload(kas.a_hat, (void *)base, size,
2361299Scth 	    HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK);
2371299Scth 	if (bp_map_arena != NULL) {
2381299Scth 		color = P2PHASE(base, bp_align);
2391299Scth 		vmem_free(bp_map_arena, (void *)(base - color),
2401299Scth 		    P2ROUNDUP(color + size, bp_align));
2411299Scth 	} else
2421299Scth 		vmem_free(heap_arena, (void *)base, size);
2431299Scth 	bp->b_flags &= ~B_REMAPPED;
2440Sstevel@tonic-gate }
245*5251Smrj 
246*5251Smrj /*
247*5251Smrj  * copy data from a KVA into a buf_t which may not be mapped in. offset
248*5251Smrj  * is relative to the buf_t only.
249*5251Smrj  */
250*5251Smrj int
bp_copyout(void * driverbuf,struct buf * bp,offset_t offset,size_t size)251*5251Smrj bp_copyout(void *driverbuf, struct buf *bp, offset_t offset, size_t size)
252*5251Smrj {
253*5251Smrj 	return (bp_copy_common(BP_COPYOUT, bp, driverbuf, offset, size));
254*5251Smrj }
255*5251Smrj 
256*5251Smrj /*
257*5251Smrj  * copy data from a buf_t which may not be mapped in, into a KVA.. offset
258*5251Smrj  * is relative to the buf_t only.
259*5251Smrj  */
260*5251Smrj int
bp_copyin(struct buf * bp,void * driverbuf,offset_t offset,size_t size)261*5251Smrj bp_copyin(struct buf *bp, void *driverbuf, offset_t offset, size_t size)
262*5251Smrj {
263*5251Smrj 	return (bp_copy_common(BP_COPYIN, bp, driverbuf, offset, size));
264*5251Smrj }
265*5251Smrj 
266*5251Smrj 
267*5251Smrj #define	BP_COPY(dir, driverbuf, baddr, sz)	\
268*5251Smrj 	(dir == BP_COPYIN) ? \
269*5251Smrj 	bcopy(baddr, driverbuf, sz) :  bcopy(driverbuf, baddr, sz)
270*5251Smrj 
271*5251Smrj static int
bp_copy_common(bp_copydir_t dir,struct buf * bp,void * driverbuf,offset_t offset,size_t size)272*5251Smrj bp_copy_common(bp_copydir_t dir, struct buf *bp, void *driverbuf,
273*5251Smrj     offset_t offset, size_t size)
274*5251Smrj {
275*5251Smrj 	page_t **pplist;
276*5251Smrj 	uintptr_t poff;
277*5251Smrj 	uintptr_t voff;
278*5251Smrj 	struct as *as;
279*5251Smrj 	caddr_t kaddr;
280*5251Smrj 	caddr_t addr;
281*5251Smrj 	page_t *page;
282*5251Smrj 	size_t psize;
283*5251Smrj 	page_t *pp;
284*5251Smrj 	pfn_t pfn;
285*5251Smrj 
286*5251Smrj 
287*5251Smrj 	ASSERT((offset + size) <= bp->b_bcount);
288*5251Smrj 
289*5251Smrj 	/* if the buf_t already has a KVA, just do a bcopy */
290*5251Smrj 	if (!(bp->b_flags & (B_PHYS | B_PAGEIO))) {
291*5251Smrj 		BP_COPY(dir, driverbuf, bp->b_un.b_addr + offset, size);
292*5251Smrj 		return (0);
293*5251Smrj 	}
294*5251Smrj 
295*5251Smrj 	/* if we don't have kpm enabled, we need to do the slow path */
296*5251Smrj 	if (!kpm_enable || bp_force_copy) {
297*5251Smrj 		bp_mapin(bp);
298*5251Smrj 		BP_COPY(dir, driverbuf, bp->b_un.b_addr + offset, size);
299*5251Smrj 		bp_mapout(bp);
300*5251Smrj 		return (0);
301*5251Smrj 	}
302*5251Smrj 
303*5251Smrj 	/*
304*5251Smrj 	 * kpm is enabled, and we need to map in the buf_t for the copy
305*5251Smrj 	 */
306*5251Smrj 
307*5251Smrj 	/* setup pp, plist, and make sure 'as' is right */
308*5251Smrj 	if (bp->b_flags & B_PAGEIO) {
309*5251Smrj 		pp = bp->b_pages;
310*5251Smrj 		pplist = NULL;
311*5251Smrj 	} else if (bp->b_flags & B_SHADOW) {
312*5251Smrj 		pp = NULL;
313*5251Smrj 		pplist = bp->b_shadow;
314*5251Smrj 	} else {
315*5251Smrj 		pp = NULL;
316*5251Smrj 		pplist = NULL;
317*5251Smrj 		if (bp->b_proc == NULL || (as = bp->b_proc->p_as) == NULL) {
318*5251Smrj 			as = &kas;
319*5251Smrj 		}
320*5251Smrj 	}
321*5251Smrj 
322*5251Smrj 	/*
323*5251Smrj 	 * locals for the address, the offset into the first page, and the
324*5251Smrj 	 * size of the first page we are going to copy.
325*5251Smrj 	 */
326*5251Smrj 	addr = (caddr_t)bp->b_un.b_addr;
327*5251Smrj 	poff = (uintptr_t)addr & PAGEOFFSET;
328*5251Smrj 	psize = MIN(PAGESIZE - poff, size);
329*5251Smrj 
330*5251Smrj 	/*
331*5251Smrj 	 * we always start with a 0 offset into the driverbuf provided. The
332*5251Smrj 	 * offset passed in only applies to the buf_t.
333*5251Smrj 	 */
334*5251Smrj 	voff = 0;
335*5251Smrj 
336*5251Smrj 	/* Loop until we've copied al the data */
337*5251Smrj 	while (size > 0) {
338*5251Smrj 
339*5251Smrj 		/*
340*5251Smrj 		 * for a pp or pplist, get the pfn, then go to the next page_t
341*5251Smrj 		 * for the next time around the loop.
342*5251Smrj 		 */
343*5251Smrj 		if (pp) {
344*5251Smrj 			page = pp;
345*5251Smrj 			pp = pp->p_next;
346*5251Smrj 		} else if (pplist != NULL) {
347*5251Smrj 			page = (*pplist);
348*5251Smrj 			pplist++;
349*5251Smrj 
350*5251Smrj 		/*
351*5251Smrj 		 * We have a user VA. If we are going to copy this page, (e.g.
352*5251Smrj 		 * the offset into the buf_t where we start to copy is
353*5251Smrj 		 * within this page), get the pfn. Don't waste the cycles
354*5251Smrj 		 * getting the pfn if we're not copying this page.
355*5251Smrj 		 */
356*5251Smrj 		} else if (offset < psize) {
357*5251Smrj 			pfn = hat_getpfnum(as->a_hat,
358*5251Smrj 			    (caddr_t)((uintptr_t)addr & PAGEMASK));
359*5251Smrj 			if (pfn == PFN_INVALID) {
360*5251Smrj 				return (-1);
361*5251Smrj 			}
362*5251Smrj 			page = page_numtopp_nolock(pfn);
363*5251Smrj 			addr += psize - offset;
364*5251Smrj 		} else {
365*5251Smrj 			addr += psize;
366*5251Smrj 		}
367*5251Smrj 
368*5251Smrj 		/*
369*5251Smrj 		 * if we have an initial offset into the buf_t passed in,
370*5251Smrj 		 * and it falls within the current page, account for it in
371*5251Smrj 		 * the page size (how much we will copy) and the offset into the
372*5251Smrj 		 * page (where we'll start copying from).
373*5251Smrj 		 */
374*5251Smrj 		if ((offset > 0) && (offset < psize)) {
375*5251Smrj 			psize -= offset;
376*5251Smrj 			poff += offset;
377*5251Smrj 			offset = 0;
378*5251Smrj 
379*5251Smrj 		/*
380*5251Smrj 		 * if we have an initial offset into the buf_t passed in,
381*5251Smrj 		 * and it's not within the current page, skip this page.
382*5251Smrj 		 * We don't have to worry about the first page offset and size
383*5251Smrj 		 * anymore. psize will normally be PAGESIZE now unless we are
384*5251Smrj 		 * on the last page.
385*5251Smrj 		 */
386*5251Smrj 		} else if (offset >= psize) {
387*5251Smrj 			offset -= psize;
388*5251Smrj 			psize = MIN(PAGESIZE, size);
389*5251Smrj 			poff = 0;
390*5251Smrj 			continue;
391*5251Smrj 		}
392*5251Smrj 
393*5251Smrj 		/*
394*5251Smrj 		 * get a kpm mapping to the page, them copy in/out of the
395*5251Smrj 		 * page. update size left and offset into the driverbuf passed
396*5251Smrj 		 * in for the next time around the loop.
397*5251Smrj 		 */
398*5251Smrj 		kaddr = hat_kpm_mapin(page, NULL) + poff;
399*5251Smrj 		BP_COPY(dir, (void *)((uintptr_t)driverbuf + voff), kaddr,
400*5251Smrj 		    psize);
401*5251Smrj 		hat_kpm_mapout(page, NULL, kaddr - poff);
402*5251Smrj 
403*5251Smrj 		size -= psize;
404*5251Smrj 		voff += psize;
405*5251Smrj 
406*5251Smrj 		poff = 0;
407*5251Smrj 		psize = MIN(PAGESIZE, size);
408*5251Smrj 	}
409*5251Smrj 
410*5251Smrj 	return (0);
411*5251Smrj }
412